diff --git a/Makefile b/Makefile index 1fbdebd0..2c44f5b8 100644 --- a/Makefile +++ b/Makefile @@ -210,7 +210,7 @@ manifests-common: $(KUSTOMIZE) manifests-monolithic-mode: $(KUSTOMIZE) $(info ******************** generates monolithic-mode manifests ********************) @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/logs > kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml - @$(KUSTOMIZE) build kubernetes/monolithic-mode/metrics > kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml + @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/metrics > kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/profiles > kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/traces > kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/all-in-one > kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml @@ -218,7 +218,7 @@ manifests-monolithic-mode: $(KUSTOMIZE) manifests-read-write-mode: $(KUSTOMIZE) $(info ******************** generates read-write-mode manifests ********************) @$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/logs > kubernetes/read-write-mode/logs/k8s-all-in-one.yaml - @$(KUSTOMIZE) build kubernetes/read-write-mode/metrics > kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml + @$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/metrics > kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml manifests-microservices-mode: $(KUSTOMIZE) $(info ******************** generates microservices-mode manifests ********************) @@ -248,6 +248,9 @@ deploy-minio: @$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-operator | kubectl apply -f - kubectl rollout status -n minio-system deployment/minio-operator --watch --timeout=600s @$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl apply -f - + @echo "Waiting for Minio to be ready..." + @sleep 20 + kubectl rollout status -n minio-system statefulset/codelab-pool-10gb --watch --timeout=600s || true delete-minio: @$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl delete --ignore-not-found -f - @@ -261,9 +264,7 @@ deploy-gateway: deploy-grafana: deploy-prometheus-operator-crds deploy-minio deploy-gateway $(info ******************** deploy grafana manifests ********************) @$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl apply -f - - @$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana-agent | kubectl apply -f - delete-grafana: - @$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana-agent | kubectl delete --ignore-not-found -f - @$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl delete --ignore-not-found -f - define echo_info @@ -277,7 +278,8 @@ define config_changes_trigger_pod_restart $(eval $@_MSG = $(1)) @kubectl rollout restart deployment -n gateway nginx kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s - @kubectl rollout restart daemonset -n monitoring-system grafana-agent + @echo "Provisioning Grafana dashboards Prometheus rules and alerts..." + @$(KUSTOMIZE) build monitoring-mixins | kubectl apply -f - kubectl rollout status -n monitoring-system daemonset/grafana-agent --watch --timeout=600s @$(call echo_info, ${$@_MSG}) endef diff --git a/kubernetes/common/grafana-agent/kustomization.yaml b/kubernetes/common/grafana-agent/kustomization.yaml index becfddd4..429fbd8d 100644 --- a/kubernetes/common/grafana-agent/kustomization.yaml +++ b/kubernetes/common/grafana-agent/kustomization.yaml @@ -15,8 +15,6 @@ helmCharts: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true files: - configs/config.river diff --git a/kubernetes/common/grafana-agent/manifests/k8s-all-in-one.yaml b/kubernetes/common/grafana-agent/manifests/k8s-all-in-one.yaml index 299cc7e2..ca87c17a 100644 --- a/kubernetes/common/grafana-agent/manifests/k8s-all-in-one.yaml +++ b/kubernetes/common/grafana-agent/manifests/k8s-all-in-one.yaml @@ -139,7 +139,7 @@ data: \"info\")\n\tformat = \"logfmt\"\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-config-6d74m77mfd namespace: monitoring-system --- apiVersion: v1 @@ -836,7 +836,7 @@ spec: serviceAccountName: grafana-agent volumes: - configMap: - name: agent-config + name: agent-config-6d74m77mfd name: config - hostPath: path: /var/log diff --git a/kubernetes/common/grafana/configs/datasources.yaml b/kubernetes/common/grafana/configs/datasources.yaml index 8b1cd660..15e287fe 100644 --- a/kubernetes/common/grafana/configs/datasources.yaml +++ b/kubernetes/common/grafana/configs/datasources.yaml @@ -16,41 +16,99 @@ datasources: type: prometheus uid: metrics access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus + orgId: 1 + url: http://nginx.gateway.svc.cluster.local.:8080/prometheus basicAuth: false - isDefault: false + isDefault: true version: 1 - editable: true + editable: false + jsonData: + prometheusType: Mimir + exemplarTraceIdDestinations: + - name: traceID + datasourceUid: traces + # Loki for logs - name: Logs type: loki - uid: logs access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 + orgId: 1 + uid: logs + url: http://nginx.gateway.svc.cluster.local.:3100 basicAuth: false isDefault: false version: 1 editable: true + jsonData: + derivedFields: + - datasourceUid: traces + matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)" + name: traceID + url: $${__value.raw} +# https://grafana.com/docs/grafana/latest/datasources/tempo/configure-tempo-data-source/#provision-the-data-source # Tempo for traces - name: Traces type: tempo access: proxy + orgId: 1 uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 + url: http://nginx.gateway.svc.cluster.local.:3200 basicAuth: false isDefault: false version: 1 editable: true apiVersion: 1 + jsonData: + search: + hide: false + lokiSearch: + datasourceUid: logs + nodeGraph: + enabled: true + serviceMap: + datasourceUid: metrics + traceQuery: + timeShiftEnabled: true + spanStartTimeShift: '-30m' + spanEndTimeShift: '30m' + spanBar: + type: 'Tag' + tag: 'http.path' + tracesToMetrics: + datasourceUid: metrics + spanStartTimeShift: '-30m' + spanEndTimeShift: '30m' + tags: [{ key: 'service.name', value: 'service' }] + queries: + - name: '(R) Rate' + query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' + - name: '(E) Error Rate' + query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' + - name: '(D) Duration' + query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' + tracesToLogsV2: + datasourceUid: logs + spanStartTimeShift: '-30m' + spanEndTimeShift: '30m' + tags: [{ key: 'app', value: 'app' }] + filterByTraceID: false + filterBySpanID: false + tracesToProfiles: + customQuery: false + datasourceUid: "profiles" + profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds" + tags: [{ key: 'app', value: 'service_name' }] + # Pyroscope for profiles - name: Profiles type: grafana-pyroscope-datasource - uid: profiles access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 + orgId: 1 + uid: profiles + url: http://nginx.gateway.svc.cluster.local.:4040 basicAuth: false isDefault: false version: 1 diff --git a/kubernetes/common/grafana/manifests/k8s-all-in-one.yaml b/kubernetes/common/grafana/manifests/k8s-all-in-one.yaml index ce46f491..82228af1 100644 --- a/kubernetes/common/grafana/manifests/k8s-all-in-one.yaml +++ b/kubernetes/common/grafana/manifests/k8s-all-in-one.yaml @@ -182,41 +182,99 @@ data: type: prometheus uid: metrics access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus + orgId: 1 + url: http://nginx.gateway.svc.cluster.local.:8080/prometheus basicAuth: false - isDefault: false + isDefault: true version: 1 - editable: true + editable: false + jsonData: + prometheusType: Mimir + exemplarTraceIdDestinations: + - name: traceID + datasourceUid: traces + # Loki for logs - name: Logs type: loki - uid: logs access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 + orgId: 1 + uid: logs + url: http://nginx.gateway.svc.cluster.local.:3100 basicAuth: false isDefault: false version: 1 editable: true + jsonData: + derivedFields: + - datasourceUid: traces + matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)" + name: traceID + url: $${__value.raw} + # https://grafana.com/docs/grafana/latest/datasources/tempo/configure-tempo-data-source/#provision-the-data-source # Tempo for traces - name: Traces type: tempo access: proxy + orgId: 1 uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 + url: http://nginx.gateway.svc.cluster.local.:3200 basicAuth: false isDefault: false version: 1 editable: true apiVersion: 1 + jsonData: + search: + hide: false + lokiSearch: + datasourceUid: logs + nodeGraph: + enabled: true + serviceMap: + datasourceUid: metrics + traceQuery: + timeShiftEnabled: true + spanStartTimeShift: '-30m' + spanEndTimeShift: '30m' + spanBar: + type: 'Tag' + tag: 'http.path' + tracesToMetrics: + datasourceUid: metrics + spanStartTimeShift: '-30m' + spanEndTimeShift: '30m' + tags: [{ key: 'service.name', value: 'service' }] + queries: + - name: '(R) Rate' + query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' + - name: '(E) Error Rate' + query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' + - name: '(D) Duration' + query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' + tracesToLogsV2: + datasourceUid: logs + spanStartTimeShift: '-30m' + spanEndTimeShift: '30m' + tags: [{ key: 'app', value: 'app' }] + filterByTraceID: false + filterBySpanID: false + tracesToProfiles: + customQuery: false + datasourceUid: "profiles" + profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds" + tags: [{ key: 'app', value: 'service_name' }] + # Pyroscope for profiles - name: Profiles type: grafana-pyroscope-datasource - uid: profiles access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 + orgId: 1 + uid: profiles + url: http://nginx.gateway.svc.cluster.local.:4040 basicAuth: false isDefault: false version: 1 @@ -226,7 +284,7 @@ kind: ConfigMap metadata: labels: grafana_datasource: "1" - name: grafana-datasources-gcc7kf6bh4 + name: grafana-datasources-22t6t9c8f8 namespace: monitoring-system --- apiVersion: v1 diff --git a/kubernetes/common/memcached/kustomization.yaml b/kubernetes/common/memcached/kustomization.yaml index 579d5eef..28b928c8 100644 --- a/kubernetes/common/memcached/kustomization.yaml +++ b/kubernetes/common/memcached/kustomization.yaml @@ -14,7 +14,6 @@ helmCharts: resources: - namespace.yaml -- ../../../monitoring-mixins/memcached-mixin/deploy # # only if auth.enabled=true # secretGenerator: diff --git a/kubernetes/common/memcached/manifests/k8s-all-in-one.yaml b/kubernetes/common/memcached/manifests/k8s-all-in-one.yaml index 912ad04a..751dc276 100644 --- a/kubernetes/common/memcached/manifests/k8s-all-in-one.yaml +++ b/kubernetes/common/memcached/manifests/k8s-all-in-one.yaml @@ -17,1288 +17,6 @@ metadata: namespace: memcached-system --- apiVersion: v1 -data: - memcached-overview.json: | - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\", command=\"get\", status=\"hit\"}[$__rate_interval])) / sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", command=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Hit Rate", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Hit Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(20,\n max by (cluster, job, instance) (\n memcached_current_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"} / memcached_max_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}\n))\n", - "format": "time_series", - "legendFormat": "{{ cluster }} / {{ job }} / {{ instance }}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top 20 Highest Connection Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Hits", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(command, status) (rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{command}} {{status}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Commands", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (rate(memcached_items_evicted_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Evictions", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (rate(memcached_items_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Stored", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ops", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance) (\n rate(memcached_process_user_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) +\n rate(memcached_process_system_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (memcached_current_bytes{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (memcached_current_items{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Items", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Resources", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (memcached_current_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - }, - { - "expr": "min(memcached_max_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "legendFormat": "Max Connections (min setting across all instances)", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Current Connections", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (rate(memcached_connections_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Connections / sec", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (rate(memcached_read_bytes_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes received", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (rate(memcached_written_bytes_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{instance}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes transmitted", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Count", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ ], - "type": "number", - "unit": "dtdurations" - }, - { - "alias": "Instance", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "instance", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Job", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "job", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Version", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "version", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "count by (job, instance, version) (memcached_version{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - }, - { - "expr": "max by (job, instance) (memcached_uptime_seconds{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memcached Info", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached Info", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(memcached_commands_total, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "job", - "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(memcached_commands_total{cluster=~\"$cluster\"}, job)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(memcached_commands_total{cluster=~\"$cluster\",job=~\"$job\"}, instance)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Memcached Overview", - "uid": "124d5222454213f748dbfaf69b77ec48", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Memcached - labels: - grafana_dashboard: "1" - name: memcached-overview.json - namespace: monitoring-system ---- -apiVersion: v1 kind: Service metadata: labels: @@ -1430,46 +148,6 @@ spec: - emptyDir: {} name: empty-dir --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: memcached-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: memcached - rules: - - alert: MemcachedDown - annotations: - description: Memcached instance {{ $labels.job }} / {{ $labels.instance }} - is down for more than 15 minutes. - summary: Memcached instance is down. - expr: | - memcached_up == 0 - for: 15m - labels: - severity: critical - - alert: MemcachedConnectionLimitApproaching - annotations: - description: Memcached instance {{ $labels.job }} / {{ $labels.instance }} - connection usage is at {{ printf "%0.0f" $value }}% for at least 15 minutes. - summary: Memcached max connection limit is approaching. - expr: | - (memcached_current_connections / memcached_max_connections * 100) > 80 - for: 15m - labels: - severity: warning - - alert: MemcachedConnectionLimitApproaching - annotations: - description: Memcached instance {{ $labels.job }} / {{ $labels.instance }} - connection usage is at {{ printf "%0.0f" $value }}% for at least 15 minutes. - summary: Memcached connections at critical level. - expr: | - (memcached_current_connections / memcached_max_connections * 100) > 95 - for: 15m - labels: - severity: critical ---- apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: diff --git a/kubernetes/microservices-mode/logs/configs/config.river b/kubernetes/microservices-mode/logs/configs/config.river index f7fb6921..ffb94479 100644 --- a/kubernetes/microservices-mode/logs/configs/config.river +++ b/kubernetes/microservices-mode/logs/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -34,6 +37,18 @@ module.file "logs_primary" { } } +/******************************************** + * Metrics + ********************************************/ +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/metrics.river" + + arguments { + forward_to = [module.file.lgtmp.exports.metrics_receiver] + clustering = true + } +} + /******************************************** * Agent Integrations ********************************************/ diff --git a/kubernetes/microservices-mode/logs/configs/grafana-datasources-loki.yaml b/kubernetes/microservices-mode/logs/configs/grafana-datasources-loki.yaml deleted file mode 100644 index 46f535f3..00000000 --- a/kubernetes/microservices-mode/logs/configs/grafana-datasources-loki.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Logs - uid: logs - -datasources: -# Loki for logs -- name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/microservices-mode/logs/k8s-all-in-one.yaml b/kubernetes/microservices-mode/logs/k8s-all-in-one.yaml index 3ba684fe..a9e7c3a6 100644 --- a/kubernetes/microservices-mode/logs/k8s-all-in-one.yaml +++ b/kubernetes/microservices-mode/logs/k8s-all-in-one.yaml @@ -17,6 +17,151 @@ metadata: namespace: logging-system --- apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 data: LOKI_COMPACTOR_HOST: loki-distributed-compactor.logging-system.svc.cluster.local LOKI_DISTRIBUTOR_HOST: loki-distributed-distributor.logging-system.svc.cluster.local @@ -182,9 +327,11 @@ data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -194,41 +341,636 @@ data: + \"/logs.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.logs_receiver]\n\t\tgit_repo \ = \"https://github.com/qclaogui/agent-modules.git\"\n\t\tgit_rev = \"main\"\n\t\tgit_pull_freq = \"0s\"\n\t}\n}\n\n/********************************************\n - * Agent Integrations\n ********************************************/\nmodule.file - \"agent_integrations\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/integrations.river\"\n\n\targuments {\n\t\tname = - \"agent-integrations\"\n\t\tnamespace = \"monitoring-system\"\n\t\tforward_to - = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" + * Metrics\n ********************************************/\nmodule.file \"metrics_primary\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering + = true\n\t}\n}\n\n/********************************************\n * Agent Integrations\n + ********************************************/\nmodule.file \"agent_integrations\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/integrations.river\"\n\n\targuments {\n\t\tname = \"agent-integrations\"\n\t\tnamespace + \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-config-6thf5hghkg namespace: monitoring-system --- apiVersion: v1 data: - datasources.yaml: | - apiVersion: 1 + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-integrations + namespace: monitoring-system +--- +apiVersion: v1 +data: + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-modules-cf8t5bf7t9 + namespace: monitoring-system +--- +apiVersion: v1 +data: + alertmanager_fallback_config.yaml: | + route: + group_wait: 0s + receiver: empty-receiver + + receivers: + # In this example we're not going to send any notification out of Alertmanager. + - name: 'empty-receiver' + mimir.yaml: | + # Do not use this configuration in production. + # It is for demonstration purposes only. + multitenancy_enabled: false + + # -usage-stats.enabled=false + usage_stats: + enabled: false + + server: + http_listen_port: 8080 + grpc_listen_port: 9095 + log_level: info + + # https://grafana.com/docs/mimir/latest/references/configuration-parameters/#use-environment-variables-in-the-configuration + common: + storage: + backend: s3 + s3: + endpoint: ${MIMIR_S3_ENDPOINT:minio.minio-system.svc:443} + access_key_id: ${MIMIR_S3_ACCESS_KEY_ID:lgtmp} + secret_access_key: ${MIMIR_S3_SECRET_ACCESS_KEY:supersecret} + insecure: ${MIMIR_S3_INSECURE:false} + http: + insecure_skip_verify: true + + alertmanager: + data_dir: /data/alertmanager + enable_api: true + external_url: /alertmanager + fallback_config_file: /etc/mimir/alertmanager_fallback_config.yaml + alertmanager_storage: + s3: + bucket_name: mimir-alertmanager + + + memberlist: + join_members: [ mimir-memberlist:7946 ] + + ingester: + ring: + replication_factor: 1 + + store_gateway: + sharding_ring: + replication_factor: 1 + + + blocks_storage: + s3: + bucket_name: mimir-blocks + tsdb: + dir: /data/ingester + ship_interval: 1m + block_ranges_period: [ 2h ] + retention_period: 3h + bucket_store: + index_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + chunks_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + metadata_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + ruler: + rule_path: /data/rules + enable_api: true + alertmanager_url: http://localhost:8080/alertmanager + ruler_storage: + s3: + bucket_name: mimir-ruler + cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + compactor: + compaction_interval: 30s + data_dir: /data/mimir-compactor + cleanup_interval: 1m + tenant_cleanup_delay: 1m - deleteDatasources: - - name: Logs - uid: logs + limits: + native_histograms_ingestion_enabled: true - datasources: - # Loki for logs - - name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true + overrides_exporter: + ring: + enabled: true + wait_stability_min_duration: 30s + + runtime_config: + file: /etc/mimir/runtime.yaml + runtime.yaml: |- + # This file can be used to set overrides or other runtime config. + ingester_limits: # limits that each ingester replica enforces + max_ingestion_rate: 20000 + max_series: 1500000 + max_tenants: 1000 + max_inflight_push_requests: 30000 + + distributor_limits: # limits that each distributor replica enforces + max_ingestion_rate: 20000 + max_inflight_push_requests: 30000 + max_inflight_push_requests_bytes: 50000000 + + overrides: + anonymous: # limits for anonymous that the whole cluster enforces + # ingestion_tenant_shard_size: 9 + max_global_series_per_user: 1500000 + max_fetched_series_per_query: 100000 + native_histograms_ingestion_enabled: true + ruler_max_rules_per_rule_group: 50 kind: ConfigMap metadata: labels: - grafana_datasource: "1" - name: grafana-datasources-9tgbk45h65 + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-config-958c4gm5k9 namespace: monitoring-system --- apiVersion: v1 @@ -241,6 +983,51 @@ metadata: type: Opaque --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 kind: Service metadata: labels: @@ -582,6 +1369,133 @@ spec: app.kubernetes.io/name: loki-distributed type: ClusterIP --- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + ports: + - name: http-metrics + port: 8080 + - name: grpc-distribut + port: 9095 + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + prometheus.io/service-monitor: "false" + name: mimir-memberlist + namespace: monitoring-system +spec: + clusterIP: None + ports: + - appProtocol: tcp + name: tcp-gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -1161,6 +2075,92 @@ spec: name: data --- apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + template: + metadata: + annotations: + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: info + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: mimir + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + spec: + containers: + - args: + - -target=all + - -config.expand-env=true + - -config.file=/etc/mimir/mimir.yaml + - -memberlist.bind-addr=$(POD_IP) + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + envFrom: + - secretRef: + name: mimir-env-92ddctt858 + image: docker.io/grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: mimir + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc-distribut + - containerPort: 7946 + name: http-memberlist + readinessProbe: + httpGet: + path: /ready + port: http-metrics + resources: + limits: + cpu: 999m + memory: 1Gi + requests: + cpu: 10m + memory: 55Mi + volumeMounts: + - mountPath: /etc/mimir + name: config + - mountPath: /data + name: storage + terminationGracePeriodSeconds: 60 + volumes: + - configMap: + name: mimir-config-958c4gm5k9 + name: config + - emptyDir: {} + name: storage +--- +apiVersion: apps/v1 kind: StatefulSet metadata: labels: @@ -1539,3 +2539,214 @@ spec: app.kubernetes.io/component: query-scheduler app.kubernetes.io/instance: loki-distributed app.kubernetes.io/name: loki-distributed +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-6thf5hghkg + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + endpoints: + - port: http-metrics + relabelings: + - replacement: monitoring-system/mimir + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - monitoring-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/microservices-mode/logs/kustomization.yaml b/kubernetes/microservices-mode/logs/kustomization.yaml index 205b22c4..ec6dee3e 100644 --- a/kubernetes/microservices-mode/logs/kustomization.yaml +++ b/kubernetes/microservices-mode/logs/kustomization.yaml @@ -8,8 +8,13 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - loki-distributed +# optional +- ../../monolithic-mode/metrics/mimir + + secretGenerator: - name: loki-distributed-env namespace: logging-system @@ -19,19 +24,10 @@ secretGenerator: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-loki.yaml - - name: loki-distributed namespace: logging-system behavior: replace diff --git a/kubernetes/microservices-mode/metrics/configs/config.river b/kubernetes/microservices-mode/metrics/configs/config.river index b6bfcd6d..b8070635 100644 --- a/kubernetes/microservices-mode/metrics/configs/config.river +++ b/kubernetes/microservices-mode/metrics/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" diff --git a/kubernetes/microservices-mode/metrics/configs/grafana-datasources-mimir.yaml b/kubernetes/microservices-mode/metrics/configs/grafana-datasources-mimir.yaml deleted file mode 100644 index abd752fa..00000000 --- a/kubernetes/microservices-mode/metrics/configs/grafana-datasources-mimir.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Metrics - uid: metrics -- name: Logs - uid: logs - -datasources: -# Mimir for metrics -- name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true - -# Loki for logs -- name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/microservices-mode/metrics/k8s-all-in-one.yaml b/kubernetes/microservices-mode/metrics/k8s-all-in-one.yaml index 10f7184b..d5279943 100644 --- a/kubernetes/microservices-mode/metrics/k8s-all-in-one.yaml +++ b/kubernetes/microservices-mode/metrics/k8s-all-in-one.yaml @@ -1,5 +1,17 @@ apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/instance: mimir-distributed @@ -10,6 +22,127 @@ metadata: name: mimir-distributed namespace: monitoring-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- apiVersion: v1 data: MIMIR_ALERT_MANAGER_HOST: mimir-distributed-alertmanager-headless.monitoring-system.svc.cluster.local @@ -23,865 +156,15 @@ metadata: namespace: gateway --- apiVersion: v1 -data: - agent-cluster-node.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Node Info", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Lamport clock time" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Internal cluster state observers" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip health score" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip protocol version" - } - ], - "title": "Node Info", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #(.*)", - "renamePattern": "$1" - } - }, - { - "id": "reduce", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { }, - "indexByName": { }, - "renameByName": { - "Field": "Metric", - "Max": "Value" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{event}}", - "range": true - } - ], - "title": "Gossip ops/s", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:peers" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Known peers", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node by state (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "{{state}}", - "range": true - } - ], - "title": "Peers by state", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "title": "Gossip Transport", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Transport bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Packet write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", - "fieldConfig": { - "defaults": { - "unit": "pkts" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "tx queue", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "rx queue", - "range": true - } - ], - "title": "Pending packet queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Stream bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Stream write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "Open streams", - "range": true - } - ], - "title": "Open transport streams", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "instance", - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Node", - "uid": "dd370cd333b2d9258435fb1b5a20a89b" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-node.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-cluster-overview.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Nodes", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Nodes info.\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Dashboard" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "index": 0, - "text": "Link" - } - }, - "type": "value" - } - ] - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Detail dashboard for node", - "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Node table", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": false, - "__name__": true, - "cluster": true, - "namespace": true, - "state": false - }, - "indexByName": { }, - "renameByName": { - "Value": "Dashboard", - "instance": "", - "state": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "1": { - "color": "red", - "index": 1, - "text": "Not converged" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "green", - "index": 0, - "text": "Converged" - } - }, - "type": "special" - } - ], - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 9 - }, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Convergance state", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 80, - "spanNulls": true - }, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "text": "Yes" - } - }, - "type": "value" - }, - { - "options": { - "1": { - "color": "red", - "text": "No" - } - }, - "type": "value" - } - ], - "max": 1, - "noValue": 0 - } - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 9 - }, - "options": { - "mergeValues": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", - "instant": false, - "legendFormat": "Converged", - "range": true - } - ], - "title": "Convergance state timeline", - "type": "state-timeline" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Overview", - "uid": "7e07f9c975fcfc2a6e120a95f579f843" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-overview.json - namespace: monitoring-system ---- -apiVersion: v1 data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -896,9603 +179,485 @@ data: \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-controller.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component controller documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", - "fieldConfig": { - "defaults": { - "unit": "agents" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 0 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running agents", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The number of running components across all running agents.\n", - "fieldConfig": { - "defaults": { - "unit": "components" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 4 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running components", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The percentage of components which are in a healthy state.\n", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "No components", - "unit": "percentunit" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 8 - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "text": { - "valueSize": 80 - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Overall component health", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", - "fieldConfig": { - "defaults": { - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Unhealthy" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unknown" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "blue", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Exited" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 14, - "x": 10, - "y": 0 - }, - "options": { - "orientation": "vertical", - "showUnfilled": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Healthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Unhealthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", - "instant": true, - "legendFormat": "Unknown", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", - "instant": true, - "legendFormat": "Exited", - "range": false - } - ], - "title": "Components by health", - "type": "bargauge" - }, - { - "datasource": "${datasource}", - "description": "The frequency at which components get updated.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "options": { - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Component evaluation rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Component evaluation time", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Slow components evaluation times", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component evaluation histogram", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component dependency wait histogram", - "type": "heatmap" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Controller", - "uid": "f861e5fef2e795edd5c4c73bee1ba769" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-controller.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-opentelemetry.json: |- - { - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Receivers for traces [otelcol.receiver]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully pushed into the pipeline.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Accepted spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans that could not be pushed into the pipeline.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Refused spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The duration of inbound RPCs.\n", - "fieldConfig": { - "defaults": { - "unit": "milliseconds" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "RPC server duration (traces)", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "title": "Batching [otelcol.processor.batch]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of units in the batch\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Number of units in the batch", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Number of distinct metadata value combinations being processed\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Distinct metadata values", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of times the batch was sent due to a timeout trigger\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Timeout trigger", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "title": "Exporters for traces [otelcol.exporter]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully sent to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported sent spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans in failed attempts to send to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported failed spans", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / OpenTelemetry", - "uid": "c90e752eb8c0fce588f906b7279aceea" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-opentelemetry.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-prometheus-remote-write.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "prometheus.scrape", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "% of targets successfully scraped", - "range": true - } - ], - "title": "Scrape success rate in $cluster", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p99", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p95", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p50", - "range": true - } - ], - "title": "Scrape duration in $cluster", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "title": "prometheus.remote_write", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "WAL delay", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Data write throughput", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Minimum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Maximum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Minimum", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Maximum", - "range": true - } - ], - "title": "Shards", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total outgoing samples sent by prometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Sent samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Failed samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Retried samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "showLegend": false - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "Series", - "range": true - } - ], - "title": "Active series (total)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Active series (by instance/component)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Active series (by component)", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "component", - "multi": true, - "name": "component", - "query": { - "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", - "refId": "component" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "url", - "multi": true, - "name": "url", - "query": { - "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", - "refId": "url" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Prometheus Components", - "uid": "ee34ffa2d084547d650e1d96a26306aa" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-prometheus-remote-write.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-resources.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Resident memory size of the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate at which the Grafana Agent process performs garbage collections.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Garbage collections", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Heap memory currently in use by the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (heap inuse)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network send bandwidth", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Resources", - "uid": "d47aae5c53be5550f8e3bc8a904ba61a" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - go-runtime.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Go runtime metrics", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 14, - "iteration": 1623758038990, - "links": [ ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average total bytes of memory reserved across all process instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Total Reserved Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average stack memory usage across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: stack inuse (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Stack Memory Use", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mspan (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mcache (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: buck hash (avg)", - "refId": "E" - }, - { - "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: gc (avg)", - "refId": "F" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Other Memory Reservations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap reserved (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap in use (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap alloc (avg)", - "refId": "C" - }, - { - "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap idle (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap released (avg)", - "refId": "E" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average allocation rate in bytes per second, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{job}}: bytes malloced/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Allocation Rate, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average rate of heap object allocation, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{job}}: obj mallocs/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Object Allocation Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of live memory objects across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: object count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Number of Live Objects", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of goroutines across instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: goroutine count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: min gc time (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: max gc time (avg)", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GC min & max duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} next gc bytes (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Next GC, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "go-runtime" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "MONITORING", - "value": "MONITORING" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "pilot", - "value": "pilot" - }, - "datasource": "$datasource", - "definition": "label_values(go_info, job)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "job", - "multi": false, - "name": "job", - "options": [ ], - "query": { - "query": "label_values(go_info, job)", - "refId": "MONITORING-job-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "definition": "label_values(go_info{job=\"$job\"}, instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": { - "query": "label_values(go_info{job=\"$job\"}, instance)", - "refId": "MONITORING-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Go runtime metrics", - "uid": "T4sSTLBGzgp", - "version": 1 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Go - Runtime - labels: - grafana_dashboard: "1" - name: go-runtime.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Metrics - uid: metrics - - name: Logs - uid: logs - - datasources: - # Mimir for metrics - - name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true - - # Loki for logs - - name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true -kind: ConfigMap -metadata: - labels: - grafana_datasource: "1" - name: grafana-datasources-5bcgmmbbkh - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager resources", - "uid": "a6883fb22799ac74479c7db872451092", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total alerts", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total silences", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "APS", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts received", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "per pod Active Aggregation Groups", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts grouping", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alert notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod tenants", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod silences", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Replication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Syncs/sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Syncs/sec (by reason)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "errors", - "legendLink": null - } - ], - "title": "Ring check errors/sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant configuration sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "{{outcome}}", - "legendLink": null - } - ], - "title": "Initial syncs /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Initial sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "1m", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Fetch state from other alertmanagers /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding initial state sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Replicate state to other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Merge state from other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Persist state to remote storage /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding runtime state sync", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager", - "uid": "b0d38d318bbddd80476246d4930f9e55", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU and memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor-resources.json + name: agent-config-9cc7gk9k2b namespace: monitoring-system --- apiVersion: v1 data: - mimir-compactor.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "completed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "started" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#34CCEB", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "started", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "completed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Per-instance runs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", - "fieldConfig": { - "defaults": { - "max": 1, - "noValue": 1, - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Tenants compaction progress", - "type": "timeseries" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "No compactor data", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "text", - "text": "No successful runs since startup yet" - }, - "to": 0 - }, - "type": "range" - } - ] - }, - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 7200 - }, - { - "color": "orange", - "value": 21600 - }, - { - "color": "red", - "value": 43200 - } - ] - } - } - ] - } - ] - }, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "reduceOptions": { - "calcs": [ - "first" - ], - "fields": "/^Last run$/", - "values": false - }, - "textMode": "value" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Longest time since last successful run", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - } - ], - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Status" - }, - "properties": [ - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "transparent", - "text": "N/A" - }, - "to": 0 - }, - "type": "range" - }, - { - "options": { - "from": 0, - "result": { - "color": "green", - "text": "Ok" - }, - "to": 7200 - }, - "type": "range" - }, - { - "options": { - "from": 7200, - "result": { - "color": "yellow", - "text": "Delayed" - }, - "to": 21600 - }, - "type": "range" - }, - { - "options": { - "from": 21600, - "result": { - "color": "orange", - "text": "Late" - }, - "to": 43200 - }, - "type": "range" - }, - { - "options": { - "from": 43200, - "result": { - "color": "red", - "text": "Very late" - }, - "to": "Infinity" - }, - "type": "range" - }, - { - "options": { - "match": "null+nan", - "result": { - "color": "transparent", - "text": "Unknown" - } - }, - "type": "special" - } - ] - }, - { - "id": "custom.width", - "value": 86 - }, - { - "id": "custom.align", - "value": "center" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "text": "Never" - }, - "to": 0 - }, - "type": "range" - } - ] - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "legendFormat": "Last run", - "legendLink": null - } - ], - "title": "Last successful run per-compactor replica", - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - }, - { - "id": "calculateField", - "options": { - "alias": "One", - "binary": { - "left": "Last run", - "operator": "/", - "right": "Last run" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "calculateField", - "options": { - "alias": "Status", - "binary": { - "left": "Last run", - "operator": "*", - "right": "One" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Compactor", - "Last run", - "Status" - ] - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", - "format": "time_series", - "legendFormat": "Jobs", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "compactions", - "legendLink": null - } - ], - "title": "TSDB compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "TSDB compaction duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "Average blocks / tenant", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Tenants with largest number of blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks marked for deletion / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks deletions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Garbage collector", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Metadata syncs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Metadata sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Metadata sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Object Store", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Key-value store for compactors ring", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", - "version": 0 - } + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor.json + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - mimir-config.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Startup config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Startup config file", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Runtime config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Runtime config file", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Config", - "uid": "5d9d0b4724c0f80d68467088ec61e003", - "version": 0 - } + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-config.json + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 @@ -10712,32911 +877,120 @@ metadata: --- apiVersion: v1 data: - mimir-object-store.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "RPS / component", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "Error rate / component", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Components", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "RPS / operation", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate / operation", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Operations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Object Store", - "uid": "e1324ee2a434f4158c00a9ee279d3292", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-object-store.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overrides.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 1, - "span": 12, - "targets": [ - { - "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Defaults", - "transformations": [ - { - "id": "labelsToFields", - "options": { } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Value": 1, - "limit_name": 0 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "limit_name" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 2, - "span": 12, - "targets": [ - { - "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Per-tenant overrides", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "limit_name" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "user": 0 - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overrides", - "uid": "1e2c358600ac53f09faea133f811b5bb", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overrides.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview resources", - "uid": "a9b92d3c4d1af325d872a9e9a7083d71", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#7EB26D", - "value": null - }, - { - "color": "#EAB839", - "value": 0.01 - }, - { - "color": "#E24D42", - "value": 0.050000000000000003 - } - ] - } - } - }, - "id": 2, - "options": { - "showValue": "never" - }, - "span": 6, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Writes", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Reads", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Rule evaluations", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", - "instant": false, - "legendFormat": "Alerting notifications", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Object storage", - "range": true - } - ], - "title": "Status", - "type": "state-timeline" - }, - { - "id": 3, - "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", - "alertName": "Mimir", - "dashboardAlerts": false, - "maxItems": 100, - "sortOrder": 3, - "stateFilter": { - "error": true, - "firing": true, - "noData": false, - "normal": false, - "pending": false - } - }, - "span": 3, - "title": "Firing alerts", - "type": "alertlist" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Mimir cluster health", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 4, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Write requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "cps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "samples / sec", - "legendLink": null - }, - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "exemplars / sec", - "legendLink": null - } - ], - "title": "Ingestion / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", - "datasource": null, - "description": "", - "id": 8, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Read requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Read latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "\"active series\" queries", - "color": "#C788DE" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "instant queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_active_series\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"active series\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_names\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label name cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_values\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label value cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars|cardinality_.*)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "other", - "legendLink": null - } - ], - "title": "Queries / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 12, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Rule evaluations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Rule evaluations latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Alerting notifications sent to Alertmanager / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Recording and alerting rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", - "datasource": null, - "description": "", - "id": 16, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "attributes", - "color": "#429D48" - }, - { - "alias": "delete", - "color": "#F1C731" - }, - { - "alias": "exists", - "color": "#2A66CF" - }, - { - "alias": "get", - "color": "#9E44C1" - }, - { - "alias": "get_range", - "color": "#FFAB57" - }, - { - "alias": "iter", - "color": "#C79424" - }, - { - "alias": "upload", - "color": "#84D586" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Total number of blocks in the storage", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Long-term storage (object storage)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Retries", - "type": "timeseries", - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "splitting rate", - "legendLink": null - } - ], - "title": "Intervals per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", - "format": "time_series", - "legendFormat": "{{request_type}}", - "legendLink": null - } - ], - "title": "Query results cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Query results cache skipped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query splitting and results cache", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "sharded queries ratio", - "legendLink": null - } - ], - "title": "Sharded queries ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of sharded queries per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query sharding", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Series per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Samples per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Exemplars per query", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failure Rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failure Rate", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected queries", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Max", - "legendLink": null - }, - { - "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Min", - "legendLink": null - }, - { - "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "legendLink": null - } - ], - "title": "Bucket indexes loaded (per querier)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Bucket indexes load / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Bucket indexes load latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks queried / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data fetched / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data touched / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request average latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request 99th percentile latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", - "format": "time_series", - "legendFormat": "% of time reduced by preloading", - "legendLink": null - } - ], - "title": "Series batch preloading efficiency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Blocks currently owned", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks loaded / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks dropped / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Lazy loaded index-headers", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load gate latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Series hash cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "ExpandedPostings cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Chunks attributes in-memory cache hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Queries", - "uid": "b3abe8d5c040395cc36615cb4334c92d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Instant queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Range queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label names queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label values queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Series queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "Requests/s", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Cache – query results", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{item_type}}", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – block index cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – chunks cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 40, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 41, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 42, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 43, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 44, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 45, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 46, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 47, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 48, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 49, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 50, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 51, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 52, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 53, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 54, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 55, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 56, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads resources", - "uid": "1940f6ef765a506a171faa2056c956c3", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Remote ruler reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Evaluations / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads", - "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-rollout-progress.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Ready" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Updated" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 10, - "x": 0, - "y": 0 - }, - "id": 1, - "links": [ ], - "options": { - "barRadius": 0, - "barWidth": 0.96999999999999997, - "fullHighlight": false, - "groupWidth": 0.69999999999999996, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "xField": "Workload", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0 - }, - "targets": [ - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - }, - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - } - ], - "title": "Rollout progress", - "transformations": [ - { - "id": "joinByField", - "options": { - "byField": "workload", - "mode": "outer" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time 1": true, - "Time 2": true - }, - "renameByName": { - "Value #A": "Updated", - "Value #B": "Ready", - "workload": "Workload" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "field": "Workload" - } - ] - } - } - ], - "type": "barchart" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 0 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 4 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.01 - }, - { - "color": "red", - "value": 0.050000000000000003 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 4 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 4 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "noValue": "All healthy", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 3, - "w": 10, - "x": 0, - "y": 13 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "text": { - "titleSize": 14, - "valueSize": 14 - }, - "textMode": "value_and_name" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{deployment}}", - "legendLink": null, - "step": null - }, - { - "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{statefulset}}", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Unhealthy pods", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "r.*" - }, - "properties": [ - { - "id": "custom.align", - "value": "center" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 10, - "y": 8 - }, - "id": 11, - "targets": [ - { - "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods count per version", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "valueLabel": "version" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "container": 1 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "container" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10 - }, - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "writes", - "legendLink": null - }, - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "reads", - "legendLink": null - } - ], - "title": "Latency vs 24h ago", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": null, - "schemaVersion": 27, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Rollout progress", - "uid": "7f0b5567d543a1698e695b530eb7f5de", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-rollout-progress.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-ruler.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Active configurations", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total rules", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Read from ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Write to ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Evaluations per second", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluations global", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - key-value store for rulers ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failures / sec" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failures / sec", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - blocks storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Delivery errors", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Dropped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Missed iterations", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failures", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Group evaluations", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluation per user", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler configuration object store (ruler accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-ruler.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-scaling.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "id": 1, - "options": { - "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", - "mode": "markdown" - }, - "span": 12, - "title": "", - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Service scaling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "400px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Required Replicas", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "__name__", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Service", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "deployment", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "namespace", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Reason", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Workload-based scaling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Scaling", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Scaling", - "uid": "64bbad83507b7289b514725658e10352", - "version": 0 - } -kind: ConfigMap + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-scaling.json + name: integrations-memcached namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-slow-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Accross tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 User-Agents", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "fetched_chunk_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_index_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "response_size_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_hit_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_miss_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "estimated_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_chunks_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Time span" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Duration" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "queue_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "query_wall_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "height": "500px", - "id": 19, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Slow queries", - "transformations": [ - { - "id": "extractFields", - "options": { - "source": "labels" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "component": true, - "container": true, - "gossip_ring_member": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "length": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_step": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "response_time": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "err": 10, - "length_seconds": 3, - "param_end": 5, - "param_query": 8, - "param_start": 4, - "param_step_seconds": 7, - "param_time": 6, - "response_time_seconds": 9, - "status": 1, - "ts": 0, - "user": 2 - }, - "renameByName": { - "err": "Error", - "length_seconds": "Time span", - "param_end": "End", - "param_query": "Query", - "param_start": "Start", - "param_step_seconds": "Step", - "param_time": "Time (instant query)", - "response_time_seconds": "Duration", - "ts": "Completion date", - "user": "Tenant ID" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "sharded_queries" - }, - { - "destinationType": "number", - "targetField": "split_queries" - }, - { - "destinationType": "number", - "targetField": "fetched_chunk_bytes" - }, - { - "destinationType": "number", - "targetField": "fetched_index_bytes" - }, - { - "destinationType": "number", - "targetField": "response_size_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_hit_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_miss_bytes" - }, - { - "destinationType": "number", - "targetField": "estimated_series_count" - }, - { - "destinationType": "number", - "targetField": "fetched_chunks_count" - }, - { - "destinationType": "number", - "targetField": "fetched_series_count" - }, - { - "destinationType": "number", - "targetField": "Time span" - }, - { - "destinationType": "number", - "targetField": "Duration" - }, - { - "destinationType": "number", - "targetField": "Step" - }, - { - "destinationType": "number", - "targetField": "queue_time_seconds" - }, - { - "destinationType": "number", - "targetField": "query_wall_time_seconds" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "includeAll": false, - "label": "Loki data source", - "multi": false, - "name": "loki_datasource", - "query": "loki", - "type": "datasource" - }, - { - "current": { - "selected": true, - "text": "5s", - "value": "5s" - }, - "hide": 0, - "label": "Min duration", - "name": "min_duration", - "options": [ - { - "selected": true, - "text": "5s", - "value": "5s" - } - ], - "query": "5s", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "User-Agent HTTP Header", - "name": "user_agent", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Slow queries", - "uid": "6089e1ce1e678788f46312a0a1e647e6", - "version": 0 - } -kind: ConfigMap + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-slow-queries.json + name: integrations-mysql namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "in-memory", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "owned", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "All series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series per ingester", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Owned series per ingester", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant series counts", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "title": "Series with exemplars", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Oldest exemplar age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "Native histogram series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "buckets", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "buckets ({{ name }})", - "legendLink": null - } - ], - "title": "Total number of buckets used by native histogram series", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars and native histograms", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor requests incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor requests received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Newest seen sample age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded requests rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor ingestion requests", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor samples incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor samples received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "deduplicated", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "non-HA", - "legendLink": null - } - ], - "title": "Distributor deduplicated/non-HA", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (distributor)", - "legendLink": null - }, - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (ingester)", - "legendLink": null - } - ], - "title": "Distributor and ingester discarded samples rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Symbol table size for loaded blocks", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Space used by local blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingesters' storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "time_series", - "legendFormat": "groups", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Number of groups", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "rules", - "legendLink": null - } - ], - "title": "Number of rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Total evaluations rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failed evaluations rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Sent notifications rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Failed notifications rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "alerts", - "legendLink": null - }, - { - "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "silences", - "legendLink": null - } - ], - "title": "Alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (User)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - ruler-query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - ruler-query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (Ruler)", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 50, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactions", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "user", - "multi": false, - "name": "user", - "options": [ ], - "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "500", - "value": "500" - }, - { - "selected": false, - "text": "1000", - "value": "1000" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Tenants", - "uid": "35fa247ce651ba189debf33d7ae41611", - "version": 0 - } -kind: ConfigMap + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-top-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Top tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by active series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By active series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by in-memory series (series created - series removed)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by discarded samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by series with exemplars", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By series with exemplars", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "exemplars/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received exemplars rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By exemplars rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group size", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group evaluation time", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Compaction Jobs", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by estimated compaction jobs from bucket-index", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By estimated compaction jobs from bucket-index", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Top tenants", - "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-top-tenants.json + name: integrations-redis namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-writes-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", - "version": 0 - } -kind: ConfigMap + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-networking.json + name: mimir-distributed-env-92ddctt858 namespace: monitoring-system +type: Opaque --- apiVersion: v1 -data: - mimir-writes-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", - "version": 0 - } -kind: ConfigMap +kind: Service metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes-resources.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 -data: - mimir-writes.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "125px", - "panels": [ - { - "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Writes dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Samples / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", - "fill": 1, - "format": "short", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "In-memory series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", - "fill": 1, - "format": "short", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars in ingesters", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for high-availability (HA) deduplication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for distributors ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - key-value store for the ingesters ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Uploaded blocks / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Upload latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - shipper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Compactions latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB head", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "WAL truncations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Checkpoints created / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "WAL truncations latency (includes checkpointing)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "WAL" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "mmap-ed chunks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E28A42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "WAL", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "mmap-ed chunks", - "legendLink": null - } - ], - "title": "Corruptions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB write ahead log (WAL)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "incoming exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "received exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars received rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "ingested exemplars", - "legendLink": null - } - ], - "title": "Ingester ingested exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "appended exemplars", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected distributor requests", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected ingester requests", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Instance Limits", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", - "version": 0 - } -kind: ConfigMap +kind: Service metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= -kind: Secret -metadata: - name: mimir-distributed-env-92ddctt858 + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster namespace: monitoring-system -type: Opaque +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 kind: Service @@ -45544,1595 +2918,148 @@ spec: app.kubernetes.io/instance: mimir-distributed app.kubernetes.io/name: mimir --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: agent-flow-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: clustering - rules: - - alert: ClusterNotConverging - annotations: - message: Cluster is not converging. - expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) - != 0 - for: 5m - - alert: ClusterSplitBrain - annotations: - message: Cluster nodes have entered a split brain state. - expr: | - sum without (state) (cluster_node_peers) != - on (cluster, namespace) group_left - count by (cluster, namespace) (cluster_node_info) - for: 5m - - alert: ClusterLamportClockDrift - annotations: - message: Cluster nodes' lamport clocks are not converging. - expr: stddev by (cluster, namespace) (cluster_node_lamport_time) > 4 * sqrt(count - by (cluster, namespace) (cluster_node_info)) - for: 5m - - alert: ClusterNodeUnhealthy - annotations: - message: Cluster node is reporting a health score > 0. - expr: | - cluster_node_gossip_health_score > 0 - for: 5m - - alert: ClusterLamportClockStuck - annotations: - message: Cluster nodes's lamport clocks is not progressing. - expr: | - sum by (cluster, namespace, instance) (rate(cluster_node_lamport_time[2m])) == 0 - and on (cluster, namespace, instance) (cluster_node_peers > 1) - for: 5m - - alert: ClusterNodeNameConflict - annotations: - message: A node tried to join the cluster with a name conflicting with an - existing peer. - expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) - > 0 - for: 10m - - alert: ClusterNodeStuckTerminating - annotations: - message: Cluster node stuck in Terminating state. - expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) - > 0 - for: 5m - - alert: ClusterConfigurationDrift - annotations: - message: Cluster nodes are not using the same configuration file. - expr: | - count without (sha256) ( - max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) - ) > 1 - for: 5m - - name: agent_controller - rules: - - alert: SlowComponentEvaluations - annotations: - message: Flow component evaluations are taking too long. - expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) - > 0 - for: 15m - - alert: UnhealthyComponents - annotations: - message: Unhealthy Flow components detected. - expr: sum(agent_component_controller_running_components{health_type!="healthy"}) - > 0 - for: 15m ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +apiVersion: apps/v1 +kind: DaemonSet metadata: - name: mimir-mixin-alerts + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_alerts - rules: - - alert: MimirIngesterUnhealthy - annotations: - message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ - printf "%f" $value }} unhealthy ingester(s). - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy - expr: | - min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 - for: 15m - labels: - severity: critical - - alert: MimirRequestErrors - annotations: - message: | - The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors - expr: | - 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) - / - sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) - > 1 - for: 15m - labels: - severity: critical - - alert: MimirRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency - expr: | - cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} - > - 2.5 - for: 15m - labels: - severity: warning - - alert: MimirQueriesIncorrect - annotations: - message: | - The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect - expr: | - 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) - / - sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 - for: 15m - labels: - severity: warning - - alert: MimirInconsistentRuntimeConfig - annotations: - message: | - An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig - expr: | - count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 - for: 1h - labels: - severity: critical - - alert: MimirBadRuntimeConfig - annotations: - message: | - {{ $labels.job }} failed to reload runtime config. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig - expr: | - # The metric value is reset to 0 on error while reloading the config at runtime. - cortex_runtime_config_last_reload_successful == 0 - for: 5m - labels: - severity: critical - - alert: MimirFrontendQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 - for: 5m - labels: - severity: critical - - alert: MimirSchedulerQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 - for: 7m - labels: - severity: critical - - alert: MimirCacheRequestErrors - annotations: - message: | - The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors - expr: | - ( - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operation_failures_total[1m]) - or - rate(thanos_cache_operation_failures_total[1m]) - ) - / - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operations_total[1m]) - or - rate(thanos_cache_operations_total[1m]) - ) - ) * 100 > 5 - for: 5m - labels: - severity: warning - - alert: MimirIngesterRestarts - annotations: - message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts - expr: | - ( - sum by(cluster, namespace, pod) ( - increase(kube_pod_container_status_restarts_total{container=~"(ingester|mimir-write)"}[30m]) - ) - >= 2 - ) - and - ( - count by(cluster, namespace, pod) (cortex_build_info) > 0 - ) - labels: - severity: warning - - alert: MimirKVStoreFailure - annotations: - message: | - Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure - expr: | - ( - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) - / - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) - ) - # We want to get alerted only in case there's a constant failure. - == 1 - for: 5m - labels: - severity: critical - - alert: MimirMemoryMapAreasTooHigh - annotations: - message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas - close to the limit.' - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh - expr: | - process_memory_map_areas{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirIngesterInstanceHasNoTenants - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no tenants assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants - expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) - and on (cluster, namespace) - # Only if there are more time-series than would be expected due to continuous testing load - ( - sum by(cluster, namespace) (cortex_ingester_memory_series) - / - max by(cluster, namespace) (cortex_distributor_replication_factor) - ) > 100000 - for: 1h - labels: - severity: warning - - alert: MimirRulerInstanceHasNoRuleGroups - annotations: - message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no rule groups assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups - expr: | - # Alert on ruler instances in microservices mode that have no rule groups assigned, - min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?ruler.*"}) == 0 - # but only if other ruler instances of the same cell do have rule groups assigned - and on (cluster, namespace) - (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) - # and there are more than two instances overall - and on (cluster, namespace) - (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) - for: 1h - labels: - severity: warning - - alert: MimirIngestedDataTooFarInTheFuture - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has ingested samples with timestamps more than 1h in the future. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture - expr: | - max by(cluster, namespace, pod) ( - cortex_ingester_tsdb_head_max_timestamp_seconds - time() - and - cortex_ingester_tsdb_head_max_timestamp_seconds > 0 - ) > 60*60 - for: 5m - labels: - severity: warning - - alert: MimirRingMembersMismatch - annotations: - message: | - Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch - expr: | - ( - avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"})) - != sum by(cluster, namespace) (up{job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"}) - ) - and - ( - count by(cluster, namespace) (cortex_build_info) > 0 - ) - for: 15m - labels: - component: ingester - severity: warning - - name: mimir_instance_limits_alerts - rules: - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.8 - for: 3h - labels: - severity: warning - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.9 - for: 5m - labels: - severity: critical - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.7 - for: 5m - labels: - severity: warning - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirReachingTCPConnectionsLimit - annotations: - message: | - Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit - expr: | - cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and - cortex_tcp_connections_limit > 0 - for: 5m - labels: - severity: critical - - alert: MimirDistributorReachingInflightPushRequestLimit - annotations: - message: | - Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit - expr: | - ( - (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) - and ignoring (limit) - (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - name: mimir-rollout-alerts - rules: - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - max without (revision) ( - sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - unless - sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - * - ( - sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - != - sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - ) and ( - changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: statefulset - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - != - sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - ) and ( - changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: deployment - - alert: RolloutOperatorNotReconciling - annotations: - message: | - Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling - expr: | - max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 - for: 5m - labels: - severity: critical - - name: mimir-provisioning - rules: - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.65 - for: 15m - labels: - severity: warning - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.8 - for: 15m - labels: - severity: critical - - name: ruler_alerts - rules: - - alert: MimirRulerTooManyFailedPushes - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerTooManyFailedQueries - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerMissedEvaluations - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations - expr: | - 100 * ( - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) - / - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) - ) > 1 - for: 5m - labels: - severity: warning - - alert: MimirRulerFailedRingCheck - annotations: - message: | - Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck - expr: | - sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) - > 0 - for: 5m - labels: - severity: critical - - alert: MimirRulerRemoteEvaluationFailing - annotations: - message: | - Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing - expr: | - 100 * ( - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(ruler-query-frontend.*)"}[5m])) - / - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(ruler-query-frontend.*)"}[5m])) - ) > 1 - for: 5m - labels: - severity: warning - - name: gossip_alerts - rules: - - alert: MimirGossipMembersTooHigh - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a higher than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh - expr: | - max by (cluster, namespace) (memberlist_client_cluster_members_count) - > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) - for: 20m - labels: - severity: warning - - alert: MimirGossipMembersTooLow - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a lower than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow - expr: | - min by (cluster, namespace) (memberlist_client_cluster_members_count) - < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) - for: 20m - labels: - severity: warning - - name: etcd_alerts - rules: - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.65 - for: 15m - labels: - severity: warning - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.8 - for: 15m - labels: - severity: critical - - name: alertmanager_alerts - rules: - - alert: MimirAlertmanagerSyncConfigsFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing - expr: | - rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 - for: 30m - labels: - severity: critical - - alert: MimirAlertmanagerRingCheckFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing - expr: | - rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPartialStateMergeFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing - expr: | - rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerReplicationFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing - expr: | - rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPersistStateFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing - expr: | - rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 - for: 1h - labels: - severity: critical - - alert: MimirAlertmanagerInitialSyncFailed - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed - expr: | - increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 - labels: - severity: critical - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: warning - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: critical - - alert: MimirAlertmanagerInstanceHasNoTenants - annotations: - message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} owns no tenants. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants - expr: | - # Alert on alertmanager instances in microservices mode that own no tenants, - min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?alertmanager.*"}) == 0 - # but only if other instances of the same cell do have tenants assigned. - and on (cluster, namespace) - max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 - for: 1h - labels: - severity: warning - - name: mimir_blocks_alerts - rules: - - alert: MimirIngesterHasNotShippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks - expr: | - (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) - and - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) - and - # Only if the ingester has ingested samples over the last 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - and - # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica - # had ingested samples in the past, then no traffic was received for a long period and then it starts - # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving - # samples, while the a block shipping is expected within the next 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterHasNotShippedBlocksSinceStart - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart - expr: | - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) - and - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - for: 4h - labels: - severity: critical - - alert: MimirIngesterHasUnshippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't - been successfully uploaded to the storage yet. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks - expr: | - (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) - and - (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadCompactionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to compact TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed - expr: | - rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed - expr: | - rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointCreationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to create TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointDeletionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to delete TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBWALTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed - expr: | - rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 - labels: - severity: warning - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 - and - # and there is only one zone - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 - labels: - deployment: single-zone - severity: critical - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 - and - # and there are multiple zones - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 - labels: - deployment: multi-zone - severity: critical - - alert: MimirIngesterTSDBWALWritesFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to write to TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed - expr: | - rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 - for: 3m - labels: - severity: critical - - alert: MimirStoreGatewayHasNotSyncTheBucket - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} has not successfully synched the bucket since {{ $value - | humanizeDuration }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket - expr: | - (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) - and - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 - for: 5m - labels: - severity: critical - - alert: MimirStoreGatewayNoSyncedTenants - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not syncing any blocks for any tenant. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants - expr: | - min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 - for: 1h - labels: - severity: warning - - alert: MimirBucketIndexNotUpdated - annotations: - message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster - }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated - expr: | - min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 - labels: - severity: critical - - name: mimir_compactor_alerts - rules: - - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully cleaned up blocks in the last 6 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) - for: 1h - labels: - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) - and - (cortex_compactor_last_successful_run_timestamp_seconds > 0) - for: 1h - labels: - reason: in-last-24h - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - cortex_compactor_last_successful_run_timestamp_seconds == 0 - for: 24h - labels: - reason: since-startup - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} failed to run 2 consecutive compactions. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 - labels: - reason: consecutive-failures - severity: critical - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) - and - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 15m - labels: - severity: critical - time_period: 24h - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block since its start. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 24h - labels: - severity: critical - time_period: since-start - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 - for: 1m - labels: - severity: warning - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 - for: 30m - labels: - severity: critical - - name: mimir_autoscaling - rules: - - alert: MimirAutoscalerNotActive - annotations: - message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler - }} in {{ $labels.namespace }} is not active. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive - expr: | - ( - label_replace(( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - # Add "metric" label. - + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0), - "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" - ) - ) - # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, - # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported - # by KEDA could not exist at all or being exposed with a value of 0. - and on (cluster, namespace, metric, scaledObject) - (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) - for: 1h - labels: - severity: critical - - alert: MimirAutoscalerKedaFailing - annotations: - message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace - }} is experiencing errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing - expr: | - ( - # Find KEDA scalers reporting errors. - label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - ) - > 0 - for: 1h - labels: - severity: critical - - name: mimir_continuous_test - rules: - - alert: MimirContinuousTestNotRunningOnWrites - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because writes are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestNotRunningOnReads - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because queries are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestFailed + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} failed when asserting query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 - labels: - severity: warning + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-9cc7gk9k2b + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate --- apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +kind: ServiceMonitor metadata: - name: mimir-mixin-rules + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_api_1 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / - sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:sum_rate - - name: mimir_api_2 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job, route) - record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, - route) - record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_api_3 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_querier_api - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job) - record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - name: mimir_storage - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:50quantile - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds:avg - - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate - - name: mimir_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:50quantile - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - by (cluster, job) - record: cluster_job:cortex_query_frontend_retries:avg - - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) - record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by - (cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by - (le, cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate - - name: mimir_ingester_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:50quantile - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series:avg - - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:50quantile - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples:avg - - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:50quantile - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars:avg - - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate - - name: mimir_received_samples - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_samples:rate5m - - name: mimir_exemplars_in - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) - record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m - - name: mimir_received_exemplars - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m - - name: mimir_exemplars_ingested - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) - record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m - - name: mimir_exemplars_appended - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) - record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m - - name: mimir_scaling_rules - rules: - - expr: | - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (cluster, namespace, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - record: cluster_namespace_deployment:actual_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - / 240000 - ) - labels: - deployment: distributor - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 240000 - ) - labels: - deployment: distributor - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - * 3 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by(cluster, namespace) ( - cortex_ingester_memory_series - )[24h:] - ) - / 1500000 - ) - labels: - deployment: ingester - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) - * 3 * 0.59999999999999998 / 1500000 - ) - labels: - deployment: ingester - reason: active_series_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - (sum by (cluster, namespace) ( - cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} - ) / 4) - / - avg by (cluster, namespace) ( - memcached_limit_bytes{job=~".+/memcached"} - ) - ) - labels: - deployment: memcached - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate - - expr: | - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - labels: - reason: cpu_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_memory_usage_bytes:sum - - expr: | - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) - labels: - reason: memory_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - name: mimir_alertmanager_rules - rules: - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_alerts) - record: cluster_job_pod:cortex_alertmanager_alerts:sum - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_silences) - record: cluster_job_pod:cortex_alertmanager_silences:sum - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m - - name: mimir_ingester_rules - rules: - - expr: | - sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) - record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -47510,3 +3437,37 @@ spec: app.kubernetes.io/component: store-gateway app.kubernetes.io/instance: mimir-distributed app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/microservices-mode/metrics/kustomization.yaml b/kubernetes/microservices-mode/metrics/kustomization.yaml index 7dc7872d..30f3e7f5 100644 --- a/kubernetes/microservices-mode/metrics/kustomization.yaml +++ b/kubernetes/microservices-mode/metrics/kustomization.yaml @@ -8,10 +8,9 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - mimir-distributed -- ../../../monitoring-mixins/agent-flow-mixin/deploy -- ../../../monitoring-mixins/go-runtime-mixin/deploy -- ../../../monitoring-mixins/mimir-mixin/deploy + secretGenerator: - name: mimir-distributed-env @@ -22,19 +21,10 @@ secretGenerator: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-mimir.yaml - - name: mimir-distributed-config namespace: monitoring-system behavior: replace diff --git a/kubernetes/microservices-mode/profiles/configs/config.river b/kubernetes/microservices-mode/profiles/configs/config.river index 3c031bf3..0e2632e2 100644 --- a/kubernetes/microservices-mode/profiles/configs/config.river +++ b/kubernetes/microservices-mode/profiles/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -31,3 +34,28 @@ module.file "profiles_primary" { clustering = true } } + +/******************************************** + * Metrics + ********************************************/ +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/metrics.river" + + arguments { + forward_to = [module.file.lgtmp.exports.metrics_receiver] + clustering = true + } +} + +/******************************************** + * Agent Integrations + ********************************************/ +module.file "agent_integrations" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/integrations.river" + + arguments { + name = "agent-integrations" + namespace = "monitoring-system" + forward_to = [module.file.lgtmp.exports.metrics_receiver] + } +} diff --git a/kubernetes/microservices-mode/profiles/configs/grafana-datasources-pyroscope.yaml b/kubernetes/microservices-mode/profiles/configs/grafana-datasources-pyroscope.yaml deleted file mode 100644 index 0ff4f5e1..00000000 --- a/kubernetes/microservices-mode/profiles/configs/grafana-datasources-pyroscope.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Profiles - uid: profiles - -datasources: -# Pyroscope for profiles -- name: Profiles - type: grafana-pyroscope-datasource - uid: profiles - access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/microservices-mode/profiles/k8s-all-in-one.yaml b/kubernetes/microservices-mode/profiles/k8s-all-in-one.yaml index 6b6cfd81..95986388 100644 --- a/kubernetes/microservices-mode/profiles/k8s-all-in-one.yaml +++ b/kubernetes/microservices-mode/profiles/k8s-all-in-one.yaml @@ -5,6 +5,30 @@ metadata: --- apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/instance: pyroscope @@ -42,6 +66,108 @@ rules: - get --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: @@ -61,6 +187,25 @@ subjects: name: pyroscope namespace: profiles-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- apiVersion: v1 data: PYROSCOPE_DISTRIBUTOR_HOST: pyroscope-distributor.profiles-system.svc.cluster.local @@ -75,9 +220,11 @@ data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -85,37 +232,637 @@ data: * Profiles\n ********************************************/\nmodule.file \"profiles_primary\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + \"/profiles.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.profiles_receiver]\n\t\tclustering - = true\n\t}\n}\n" + = true\n\t}\n}\n\n/********************************************\n * Metrics\n + ********************************************/\nmodule.file \"metrics_primary\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering + = true\n\t}\n}\n\n/********************************************\n * Agent Integrations\n + ********************************************/\nmodule.file \"agent_integrations\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/integrations.river\"\n\n\targuments {\n\t\tname = \"agent-integrations\"\n\t\tnamespace + \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" +kind: ConfigMap +metadata: + name: agent-config-52gfhcfbb4 + namespace: monitoring-system +--- +apiVersion: v1 +data: + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - datasources.yaml: | - apiVersion: 1 + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-modules-cf8t5bf7t9 + namespace: monitoring-system +--- +apiVersion: v1 +data: + alertmanager_fallback_config.yaml: | + route: + group_wait: 0s + receiver: empty-receiver + + receivers: + # In this example we're not going to send any notification out of Alertmanager. + - name: 'empty-receiver' + mimir.yaml: | + # Do not use this configuration in production. + # It is for demonstration purposes only. + multitenancy_enabled: false + + # -usage-stats.enabled=false + usage_stats: + enabled: false + + server: + http_listen_port: 8080 + grpc_listen_port: 9095 + log_level: info + + # https://grafana.com/docs/mimir/latest/references/configuration-parameters/#use-environment-variables-in-the-configuration + common: + storage: + backend: s3 + s3: + endpoint: ${MIMIR_S3_ENDPOINT:minio.minio-system.svc:443} + access_key_id: ${MIMIR_S3_ACCESS_KEY_ID:lgtmp} + secret_access_key: ${MIMIR_S3_SECRET_ACCESS_KEY:supersecret} + insecure: ${MIMIR_S3_INSECURE:false} + http: + insecure_skip_verify: true + + alertmanager: + data_dir: /data/alertmanager + enable_api: true + external_url: /alertmanager + fallback_config_file: /etc/mimir/alertmanager_fallback_config.yaml + alertmanager_storage: + s3: + bucket_name: mimir-alertmanager + + + memberlist: + join_members: [ mimir-memberlist:7946 ] + + ingester: + ring: + replication_factor: 1 + + store_gateway: + sharding_ring: + replication_factor: 1 - deleteDatasources: - - name: Profiles - uid: profiles - datasources: - # Pyroscope for profiles - - name: Profiles - type: grafana-pyroscope-datasource - uid: profiles - access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 - basicAuth: false - isDefault: true - version: 1 - editable: true + blocks_storage: + s3: + bucket_name: mimir-blocks + tsdb: + dir: /data/ingester + ship_interval: 1m + block_ranges_period: [ 2h ] + retention_period: 3h + bucket_store: + index_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + chunks_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + metadata_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + ruler: + rule_path: /data/rules + enable_api: true + alertmanager_url: http://localhost:8080/alertmanager + ruler_storage: + s3: + bucket_name: mimir-ruler + cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + compactor: + compaction_interval: 30s + data_dir: /data/mimir-compactor + cleanup_interval: 1m + tenant_cleanup_delay: 1m + + limits: + native_histograms_ingestion_enabled: true + + overrides_exporter: + ring: + enabled: true + wait_stability_min_duration: 30s + + runtime_config: + file: /etc/mimir/runtime.yaml + runtime.yaml: |- + # This file can be used to set overrides or other runtime config. + ingester_limits: # limits that each ingester replica enforces + max_ingestion_rate: 20000 + max_series: 1500000 + max_tenants: 1000 + max_inflight_push_requests: 30000 + + distributor_limits: # limits that each distributor replica enforces + max_ingestion_rate: 20000 + max_inflight_push_requests: 30000 + max_inflight_push_requests_bytes: 50000000 + + overrides: + anonymous: # limits for anonymous that the whole cluster enforces + # ingestion_tenant_shard_size: 9 + max_global_series_per_user: 1500000 + max_fetched_series_per_query: 100000 + native_histograms_ingestion_enabled: true + ruler_max_rules_per_rule_group: 50 kind: ConfigMap metadata: labels: - grafana_datasource: "1" - name: grafana-datasources-t756b6d8cg + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-config-958c4gm5k9 namespace: monitoring-system --- apiVersion: v1 @@ -168,6 +915,51 @@ metadata: namespace: profiles-system --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 data: PYROSCOPE_STORAGE_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= kind: Secret @@ -178,6 +970,133 @@ type: Opaque --- apiVersion: v1 kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + ports: + - name: http-metrics + port: 8080 + - name: grpc-distribut + port: 9095 + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + prometheus.io/service-monitor: "false" + name: mimir-memberlist + namespace: monitoring-system +spec: + clusterIP: None + ports: + - appProtocol: tcp + name: tcp-gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist +--- +apiVersion: v1 +kind: Service metadata: labels: app.kubernetes.io/component: compactor @@ -545,6 +1464,92 @@ spec: --- apiVersion: apps/v1 kind: Deployment +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + template: + metadata: + annotations: + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: info + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: mimir + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + spec: + containers: + - args: + - -target=all + - -config.expand-env=true + - -config.file=/etc/mimir/mimir.yaml + - -memberlist.bind-addr=$(POD_IP) + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + envFrom: + - secretRef: + name: mimir-env-92ddctt858 + image: docker.io/grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: mimir + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc-distribut + - containerPort: 7946 + name: http-memberlist + readinessProbe: + httpGet: + path: /ready + port: http-metrics + resources: + limits: + cpu: 999m + memory: 1Gi + requests: + cpu: 10m + memory: 55Mi + volumeMounts: + - mountPath: /etc/mimir + name: config + - mountPath: /data + name: storage + terminationGracePeriodSeconds: 60 + volumes: + - configMap: + name: mimir-config-958c4gm5k9 + name: config + - emptyDir: {} + name: storage +--- +apiVersion: apps/v1 +kind: Deployment metadata: labels: app.kubernetes.io/component: distributor @@ -1190,3 +2195,214 @@ spec: name: overrides-config - emptyDir: {} name: data +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-52gfhcfbb4 + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + endpoints: + - port: http-metrics + relabelings: + - replacement: monitoring-system/mimir + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - monitoring-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/microservices-mode/profiles/kustomization.yaml b/kubernetes/microservices-mode/profiles/kustomization.yaml index a82a8d35..e57b2c14 100644 --- a/kubernetes/microservices-mode/profiles/kustomization.yaml +++ b/kubernetes/microservices-mode/profiles/kustomization.yaml @@ -8,8 +8,13 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - pyroscope +# optional +- ../../monolithic-mode/metrics/mimir + + secretGenerator: - name: pyroscope-env namespace: profiles-system @@ -19,19 +24,10 @@ secretGenerator: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-pyroscope.yaml - - name: pyroscope-config namespace: profiles-system options: diff --git a/kubernetes/microservices-mode/traces/configs/config.river b/kubernetes/microservices-mode/traces/configs/config.river index 0fde764f..976a01a9 100644 --- a/kubernetes/microservices-mode/traces/configs/config.river +++ b/kubernetes/microservices-mode/traces/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -34,11 +37,6 @@ module.file "traces_primary" { } } -tracing { - sampling_fraction = 0.8 - write_to = [module.file.traces_primary.exports.agent_traces_input] -} - /******************************************** * Metrics ********************************************/ diff --git a/kubernetes/microservices-mode/traces/configs/grafana-datasources-tempo.yaml b/kubernetes/microservices-mode/traces/configs/grafana-datasources-tempo.yaml deleted file mode 100644 index ebe3c764..00000000 --- a/kubernetes/microservices-mode/traces/configs/grafana-datasources-tempo.yaml +++ /dev/null @@ -1,62 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Metrics - uid: metrics -- name: Traces - uid: traces - -datasources: -# Mimir for metrics -- name: Metrics - type: prometheus - uid: metrics - access: proxy - orgId: 1 - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: false - version: 1 - editable: true - jsonData: - prometheusType: Mimir - exemplarTraceIdDestinations: - - name: traceID - datasourceUid: traces - -# Tempo for traces -- name: Traces - type: tempo - access: proxy - uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 - basicAuth: false - isDefault: true - version: 1 - editable: true - jsonData: - search: - hide: false - nodeGraph: - enabled: true - serviceMap: - datasourceUid: metrics - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - spanBar: - type: 'Tag' - tag: 'http.path' - tracesToMetrics: - datasourceUid: metrics - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'service.name', value: 'service' }, { key: 'span_name' }, { key: 'http_method' }] - queries: - - name: '(R) Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' - - name: '(E) Error Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' - - name: '(D) Duration' - query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' diff --git a/kubernetes/microservices-mode/traces/k8s-all-in-one.yaml b/kubernetes/microservices-mode/traces/k8s-all-in-one.yaml index 45f95f46..900fdf0e 100644 --- a/kubernetes/microservices-mode/traces/k8s-all-in-one.yaml +++ b/kubernetes/microservices-mode/traces/k8s-all-in-one.yaml @@ -5,6 +5,18 @@ metadata: --- apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/component: mimir @@ -28,6 +40,127 @@ metadata: name: tempo-distributed namespace: tracing-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- apiVersion: v1 data: TEMPO_COMPACTOR_HOST: tempo-distributed-compactor.tracing-system.svc.cluster.local @@ -88,865 +221,15 @@ metadata: namespace: gateway --- apiVersion: v1 -data: - agent-cluster-node.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Node Info", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Lamport clock time" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Internal cluster state observers" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip health score" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip protocol version" - } - ], - "title": "Node Info", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #(.*)", - "renamePattern": "$1" - } - }, - { - "id": "reduce", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { }, - "indexByName": { }, - "renameByName": { - "Field": "Metric", - "Max": "Value" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{event}}", - "range": true - } - ], - "title": "Gossip ops/s", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:peers" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Known peers", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node by state (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "{{state}}", - "range": true - } - ], - "title": "Peers by state", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "title": "Gossip Transport", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Transport bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Packet write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", - "fieldConfig": { - "defaults": { - "unit": "pkts" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "tx queue", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "rx queue", - "range": true - } - ], - "title": "Pending packet queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Stream bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Stream write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "Open streams", - "range": true - } - ], - "title": "Open transport streams", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "instance", - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Node", - "uid": "dd370cd333b2d9258435fb1b5a20a89b" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-node.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-cluster-overview.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Nodes", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Nodes info.\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Dashboard" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "index": 0, - "text": "Link" - } - }, - "type": "value" - } - ] - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Detail dashboard for node", - "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Node table", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": false, - "__name__": true, - "cluster": true, - "namespace": true, - "state": false - }, - "indexByName": { }, - "renameByName": { - "Value": "Dashboard", - "instance": "", - "state": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "1": { - "color": "red", - "index": 1, - "text": "Not converged" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "green", - "index": 0, - "text": "Converged" - } - }, - "type": "special" - } - ], - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 9 - }, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Convergance state", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 80, - "spanNulls": true - }, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "text": "Yes" - } - }, - "type": "value" - }, - { - "options": { - "1": { - "color": "red", - "text": "No" - } - }, - "type": "value" - } - ], - "max": 1, - "noValue": 0 - } - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 9 - }, - "options": { - "mergeValues": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", - "instant": false, - "legendFormat": "Converged", - "range": true - } - ], - "title": "Convergance state timeline", - "type": "state-timeline" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Overview", - "uid": "7e07f9c975fcfc2a6e120a95f579f843" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-overview.json - namespace: monitoring-system ---- -apiVersion: v1 data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -955,8 +238,7 @@ data: {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + \"/traces.river\"\n\n\targuments {\n\t\tmetrics_forward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tlogs_forward_to \ = [module.file.lgtmp.exports.logs_receiver]\n\t\ttraces_forward_to = [module.file.lgtmp.exports.traces_receiver]\n\t\tcluster - \ = coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t}\n}\n\ntracing - {\n\tsampling_fraction = 0.8\n\twrite_to = [module.file.traces_primary.exports.agent_traces_input]\n}\n\n/********************************************\n + \ = coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t}\n}\n\n/********************************************\n * Metrics\n ********************************************/\nmodule.file \"metrics_primary\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering @@ -967,9365 +249,485 @@ data: \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-config-h9mgdthkmd namespace: monitoring-system --- apiVersion: v1 data: - agent-flow-controller.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component controller documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", - "fieldConfig": { - "defaults": { - "unit": "agents" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 0 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running agents", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The number of running components across all running agents.\n", - "fieldConfig": { - "defaults": { - "unit": "components" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 4 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running components", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The percentage of components which are in a healthy state.\n", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "No components", - "unit": "percentunit" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 8 - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "text": { - "valueSize": 80 - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Overall component health", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", - "fieldConfig": { - "defaults": { - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Unhealthy" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unknown" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "blue", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Exited" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 14, - "x": 10, - "y": 0 - }, - "options": { - "orientation": "vertical", - "showUnfilled": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Healthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Unhealthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", - "instant": true, - "legendFormat": "Unknown", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", - "instant": true, - "legendFormat": "Exited", - "range": false - } - ], - "title": "Components by health", - "type": "bargauge" - }, - { - "datasource": "${datasource}", - "description": "The frequency at which components get updated.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "options": { - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Component evaluation rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Component evaluation time", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Slow components evaluation times", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component evaluation histogram", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component dependency wait histogram", - "type": "heatmap" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Controller", - "uid": "f861e5fef2e795edd5c4c73bee1ba769" - } + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-controller.json + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - agent-flow-opentelemetry.json: |- - { - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Receivers for traces [otelcol.receiver]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully pushed into the pipeline.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Accepted spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans that could not be pushed into the pipeline.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Refused spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The duration of inbound RPCs.\n", - "fieldConfig": { - "defaults": { - "unit": "milliseconds" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "RPC server duration (traces)", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "title": "Batching [otelcol.processor.batch]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of units in the batch\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Number of units in the batch", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Number of distinct metadata value combinations being processed\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Distinct metadata values", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of times the batch was sent due to a timeout trigger\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Timeout trigger", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "title": "Exporters for traces [otelcol.exporter]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully sent to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported sent spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans in failed attempts to send to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported failed spans", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / OpenTelemetry", - "uid": "c90e752eb8c0fce588f906b7279aceea" - } + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-opentelemetry.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-prometheus-remote-write.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "prometheus.scrape", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "% of targets successfully scraped", - "range": true - } - ], - "title": "Scrape success rate in $cluster", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p99", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p95", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p50", - "range": true - } - ], - "title": "Scrape duration in $cluster", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "title": "prometheus.remote_write", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "WAL delay", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Data write throughput", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Minimum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Maximum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Minimum", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Maximum", - "range": true - } - ], - "title": "Shards", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total outgoing samples sent by prometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Sent samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Failed samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Retried samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "showLegend": false - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "Series", - "range": true - } - ], - "title": "Active series (total)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Active series (by instance/component)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Active series (by component)", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "component", - "multi": true, - "name": "component", - "query": { - "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", - "refId": "component" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "url", - "multi": true, - "name": "url", - "query": { - "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", - "refId": "url" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Prometheus Components", - "uid": "ee34ffa2d084547d650e1d96a26306aa" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-prometheus-remote-write.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-resources.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Resident memory size of the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate at which the Grafana Agent process performs garbage collections.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Garbage collections", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Heap memory currently in use by the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (heap inuse)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network send bandwidth", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Resources", - "uid": "d47aae5c53be5550f8e3bc8a904ba61a" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - go-runtime.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Go runtime metrics", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 14, - "iteration": 1623758038990, - "links": [ ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average total bytes of memory reserved across all process instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Total Reserved Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average stack memory usage across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: stack inuse (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Stack Memory Use", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mspan (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mcache (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: buck hash (avg)", - "refId": "E" - }, - { - "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: gc (avg)", - "refId": "F" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Other Memory Reservations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap reserved (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap in use (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap alloc (avg)", - "refId": "C" - }, - { - "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap idle (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap released (avg)", - "refId": "E" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average allocation rate in bytes per second, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{job}}: bytes malloced/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Allocation Rate, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average rate of heap object allocation, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{job}}: obj mallocs/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Object Allocation Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of live memory objects across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: object count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Number of Live Objects", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of goroutines across instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: goroutine count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: min gc time (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: max gc time (avg)", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GC min & max duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} next gc bytes (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Next GC, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "go-runtime" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "MONITORING", - "value": "MONITORING" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "pilot", - "value": "pilot" - }, - "datasource": "$datasource", - "definition": "label_values(go_info, job)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "job", - "multi": false, - "name": "job", - "options": [ ], - "query": { - "query": "label_values(go_info, job)", - "refId": "MONITORING-job-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "definition": "label_values(go_info{job=\"$job\"}, instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": { - "query": "label_values(go_info{job=\"$job\"}, instance)", - "refId": "MONITORING-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Go runtime metrics", - "uid": "T4sSTLBGzgp", - "version": 1 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Go - Runtime - labels: - grafana_dashboard: "1" - name: go-runtime.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Metrics - uid: metrics - - name: Traces - uid: traces - - datasources: - # Mimir for metrics - - name: Metrics - type: prometheus - uid: metrics - access: proxy - orgId: 1 - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: false - version: 1 - editable: true - jsonData: - prometheusType: Mimir - exemplarTraceIdDestinations: - - name: traceID - datasourceUid: traces - - # Tempo for traces - - name: Traces - type: tempo - access: proxy - uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 - basicAuth: false - isDefault: true - version: 1 - editable: true - jsonData: - search: - hide: false - nodeGraph: - enabled: true - serviceMap: - datasourceUid: metrics - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - spanBar: - type: 'Tag' - tag: 'http.path' - tracesToMetrics: - datasourceUid: metrics - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'service.name', value: 'service' }, { key: 'span_name' }, { key: 'http_method' }] - queries: - - name: '(R) Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' - - name: '(E) Error Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' - - name: '(D) Duration' - query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' -kind: ConfigMap -metadata: - labels: - grafana_datasource: "1" - name: grafana-datasources-k2hbd65tcb - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager resources", - "uid": "a6883fb22799ac74479c7db872451092", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total alerts", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total silences", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "APS", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts received", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "per pod Active Aggregation Groups", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts grouping", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alert notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod tenants", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod silences", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Replication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Syncs/sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Syncs/sec (by reason)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "errors", - "legendLink": null - } - ], - "title": "Ring check errors/sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant configuration sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "{{outcome}}", - "legendLink": null - } - ], - "title": "Initial syncs /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Initial sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "1m", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Fetch state from other alertmanagers /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding initial state sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Replicate state to other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Merge state from other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Persist state to remote storage /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding runtime state sync", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager", - "uid": "b0d38d318bbddd80476246d4930f9e55", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU and memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "completed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "started" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#34CCEB", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "started", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "completed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Per-instance runs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", - "fieldConfig": { - "defaults": { - "max": 1, - "noValue": 1, - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Tenants compaction progress", - "type": "timeseries" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "No compactor data", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "text", - "text": "No successful runs since startup yet" - }, - "to": 0 - }, - "type": "range" - } - ] - }, - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 7200 - }, - { - "color": "orange", - "value": 21600 - }, - { - "color": "red", - "value": 43200 - } - ] - } - } - ] - } - ] - }, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "reduceOptions": { - "calcs": [ - "first" - ], - "fields": "/^Last run$/", - "values": false - }, - "textMode": "value" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Longest time since last successful run", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - } - ], - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Status" - }, - "properties": [ - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "transparent", - "text": "N/A" - }, - "to": 0 - }, - "type": "range" - }, - { - "options": { - "from": 0, - "result": { - "color": "green", - "text": "Ok" - }, - "to": 7200 - }, - "type": "range" - }, - { - "options": { - "from": 7200, - "result": { - "color": "yellow", - "text": "Delayed" - }, - "to": 21600 - }, - "type": "range" - }, - { - "options": { - "from": 21600, - "result": { - "color": "orange", - "text": "Late" - }, - "to": 43200 - }, - "type": "range" - }, - { - "options": { - "from": 43200, - "result": { - "color": "red", - "text": "Very late" - }, - "to": "Infinity" - }, - "type": "range" - }, - { - "options": { - "match": "null+nan", - "result": { - "color": "transparent", - "text": "Unknown" - } - }, - "type": "special" - } - ] - }, - { - "id": "custom.width", - "value": 86 - }, - { - "id": "custom.align", - "value": "center" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "text": "Never" - }, - "to": 0 - }, - "type": "range" - } - ] - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "legendFormat": "Last run", - "legendLink": null - } - ], - "title": "Last successful run per-compactor replica", - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - }, - { - "id": "calculateField", - "options": { - "alias": "One", - "binary": { - "left": "Last run", - "operator": "/", - "right": "Last run" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "calculateField", - "options": { - "alias": "Status", - "binary": { - "left": "Last run", - "operator": "*", - "right": "One" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Compactor", - "Last run", - "Status" - ] - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", - "format": "time_series", - "legendFormat": "Jobs", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "compactions", - "legendLink": null - } - ], - "title": "TSDB compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "TSDB compaction duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "Average blocks / tenant", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Tenants with largest number of blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks marked for deletion / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks deletions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Garbage collector", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Metadata syncs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Metadata sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Metadata sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Object Store", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Key-value store for compactors ring", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor.json + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 @@ -10470,33175 +872,6 @@ metadata: namespace: monitoring-system --- apiVersion: v1 -data: - mimir-config.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Startup config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Startup config file", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Runtime config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Runtime config file", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Config", - "uid": "5d9d0b4724c0f80d68467088ec61e003", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-config.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-object-store.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "RPS / component", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "Error rate / component", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Components", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "RPS / operation", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate / operation", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Operations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Object Store", - "uid": "e1324ee2a434f4158c00a9ee279d3292", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-object-store.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overrides.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 1, - "span": 12, - "targets": [ - { - "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Defaults", - "transformations": [ - { - "id": "labelsToFields", - "options": { } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Value": 1, - "limit_name": 0 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "limit_name" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 2, - "span": 12, - "targets": [ - { - "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Per-tenant overrides", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "limit_name" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "user": 0 - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overrides", - "uid": "1e2c358600ac53f09faea133f811b5bb", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overrides.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview resources", - "uid": "a9b92d3c4d1af325d872a9e9a7083d71", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#7EB26D", - "value": null - }, - { - "color": "#EAB839", - "value": 0.01 - }, - { - "color": "#E24D42", - "value": 0.050000000000000003 - } - ] - } - } - }, - "id": 2, - "options": { - "showValue": "never" - }, - "span": 6, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Writes", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Reads", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Rule evaluations", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", - "instant": false, - "legendFormat": "Alerting notifications", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Object storage", - "range": true - } - ], - "title": "Status", - "type": "state-timeline" - }, - { - "id": 3, - "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", - "alertName": "Mimir", - "dashboardAlerts": false, - "maxItems": 100, - "sortOrder": 3, - "stateFilter": { - "error": true, - "firing": true, - "noData": false, - "normal": false, - "pending": false - } - }, - "span": 3, - "title": "Firing alerts", - "type": "alertlist" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Mimir cluster health", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 4, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Write requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "cps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "samples / sec", - "legendLink": null - }, - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "exemplars / sec", - "legendLink": null - } - ], - "title": "Ingestion / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", - "datasource": null, - "description": "", - "id": 8, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Read requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Read latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "\"active series\" queries", - "color": "#C788DE" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "instant queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_active_series\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"active series\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_names\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label name cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_values\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label value cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars|cardinality_.*)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "other", - "legendLink": null - } - ], - "title": "Queries / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 12, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Rule evaluations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Rule evaluations latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Alerting notifications sent to Alertmanager / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Recording and alerting rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", - "datasource": null, - "description": "", - "id": 16, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "attributes", - "color": "#429D48" - }, - { - "alias": "delete", - "color": "#F1C731" - }, - { - "alias": "exists", - "color": "#2A66CF" - }, - { - "alias": "get", - "color": "#9E44C1" - }, - { - "alias": "get_range", - "color": "#FFAB57" - }, - { - "alias": "iter", - "color": "#C79424" - }, - { - "alias": "upload", - "color": "#84D586" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Total number of blocks in the storage", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Long-term storage (object storage)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Retries", - "type": "timeseries", - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "splitting rate", - "legendLink": null - } - ], - "title": "Intervals per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", - "format": "time_series", - "legendFormat": "{{request_type}}", - "legendLink": null - } - ], - "title": "Query results cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Query results cache skipped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query splitting and results cache", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "sharded queries ratio", - "legendLink": null - } - ], - "title": "Sharded queries ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of sharded queries per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query sharding", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Series per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Samples per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Exemplars per query", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failure Rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failure Rate", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected queries", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Max", - "legendLink": null - }, - { - "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Min", - "legendLink": null - }, - { - "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "legendLink": null - } - ], - "title": "Bucket indexes loaded (per querier)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Bucket indexes load / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Bucket indexes load latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks queried / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data fetched / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data touched / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request average latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request 99th percentile latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", - "format": "time_series", - "legendFormat": "% of time reduced by preloading", - "legendLink": null - } - ], - "title": "Series batch preloading efficiency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Blocks currently owned", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks loaded / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks dropped / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Lazy loaded index-headers", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load gate latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Series hash cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "ExpandedPostings cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Chunks attributes in-memory cache hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Queries", - "uid": "b3abe8d5c040395cc36615cb4334c92d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Instant queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Range queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label names queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label values queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Series queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "Requests/s", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Cache – query results", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{item_type}}", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – block index cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – chunks cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 40, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 41, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 42, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 43, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 44, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 45, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 46, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 47, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 48, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 49, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 50, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 51, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 52, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 53, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 54, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 55, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 56, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads resources", - "uid": "1940f6ef765a506a171faa2056c956c3", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Remote ruler reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Evaluations / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads", - "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-rollout-progress.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Ready" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Updated" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 10, - "x": 0, - "y": 0 - }, - "id": 1, - "links": [ ], - "options": { - "barRadius": 0, - "barWidth": 0.96999999999999997, - "fullHighlight": false, - "groupWidth": 0.69999999999999996, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "xField": "Workload", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0 - }, - "targets": [ - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - }, - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - } - ], - "title": "Rollout progress", - "transformations": [ - { - "id": "joinByField", - "options": { - "byField": "workload", - "mode": "outer" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time 1": true, - "Time 2": true - }, - "renameByName": { - "Value #A": "Updated", - "Value #B": "Ready", - "workload": "Workload" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "field": "Workload" - } - ] - } - } - ], - "type": "barchart" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 0 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 4 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.01 - }, - { - "color": "red", - "value": 0.050000000000000003 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 4 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 4 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "noValue": "All healthy", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 3, - "w": 10, - "x": 0, - "y": 13 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "text": { - "titleSize": 14, - "valueSize": 14 - }, - "textMode": "value_and_name" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{deployment}}", - "legendLink": null, - "step": null - }, - { - "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{statefulset}}", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Unhealthy pods", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "r.*" - }, - "properties": [ - { - "id": "custom.align", - "value": "center" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 10, - "y": 8 - }, - "id": 11, - "targets": [ - { - "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods count per version", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "valueLabel": "version" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "container": 1 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "container" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10 - }, - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "writes", - "legendLink": null - }, - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "reads", - "legendLink": null - } - ], - "title": "Latency vs 24h ago", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": null, - "schemaVersion": 27, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Rollout progress", - "uid": "7f0b5567d543a1698e695b530eb7f5de", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-rollout-progress.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-ruler.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Active configurations", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total rules", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Read from ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Write to ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Evaluations per second", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluations global", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - key-value store for rulers ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failures / sec" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failures / sec", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - blocks storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Delivery errors", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Dropped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Missed iterations", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failures", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Group evaluations", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluation per user", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler configuration object store (ruler accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-ruler.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-scaling.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "id": 1, - "options": { - "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", - "mode": "markdown" - }, - "span": 12, - "title": "", - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Service scaling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "400px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Required Replicas", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "__name__", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Service", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "deployment", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "namespace", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Reason", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Workload-based scaling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Scaling", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Scaling", - "uid": "64bbad83507b7289b514725658e10352", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-scaling.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-slow-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Accross tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 User-Agents", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "fetched_chunk_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_index_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "response_size_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_hit_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_miss_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "estimated_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_chunks_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Time span" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Duration" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "queue_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "query_wall_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "height": "500px", - "id": 19, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Slow queries", - "transformations": [ - { - "id": "extractFields", - "options": { - "source": "labels" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "component": true, - "container": true, - "gossip_ring_member": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "length": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_step": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "response_time": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "err": 10, - "length_seconds": 3, - "param_end": 5, - "param_query": 8, - "param_start": 4, - "param_step_seconds": 7, - "param_time": 6, - "response_time_seconds": 9, - "status": 1, - "ts": 0, - "user": 2 - }, - "renameByName": { - "err": "Error", - "length_seconds": "Time span", - "param_end": "End", - "param_query": "Query", - "param_start": "Start", - "param_step_seconds": "Step", - "param_time": "Time (instant query)", - "response_time_seconds": "Duration", - "ts": "Completion date", - "user": "Tenant ID" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "sharded_queries" - }, - { - "destinationType": "number", - "targetField": "split_queries" - }, - { - "destinationType": "number", - "targetField": "fetched_chunk_bytes" - }, - { - "destinationType": "number", - "targetField": "fetched_index_bytes" - }, - { - "destinationType": "number", - "targetField": "response_size_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_hit_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_miss_bytes" - }, - { - "destinationType": "number", - "targetField": "estimated_series_count" - }, - { - "destinationType": "number", - "targetField": "fetched_chunks_count" - }, - { - "destinationType": "number", - "targetField": "fetched_series_count" - }, - { - "destinationType": "number", - "targetField": "Time span" - }, - { - "destinationType": "number", - "targetField": "Duration" - }, - { - "destinationType": "number", - "targetField": "Step" - }, - { - "destinationType": "number", - "targetField": "queue_time_seconds" - }, - { - "destinationType": "number", - "targetField": "query_wall_time_seconds" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "includeAll": false, - "label": "Loki data source", - "multi": false, - "name": "loki_datasource", - "query": "loki", - "type": "datasource" - }, - { - "current": { - "selected": true, - "text": "5s", - "value": "5s" - }, - "hide": 0, - "label": "Min duration", - "name": "min_duration", - "options": [ - { - "selected": true, - "text": "5s", - "value": "5s" - } - ], - "query": "5s", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "User-Agent HTTP Header", - "name": "user_agent", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Slow queries", - "uid": "6089e1ce1e678788f46312a0a1e647e6", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-slow-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "in-memory", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "owned", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "All series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series per ingester", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Owned series per ingester", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant series counts", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "title": "Series with exemplars", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Oldest exemplar age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "Native histogram series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "buckets", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "buckets ({{ name }})", - "legendLink": null - } - ], - "title": "Total number of buckets used by native histogram series", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars and native histograms", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor requests incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor requests received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Newest seen sample age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded requests rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor ingestion requests", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor samples incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor samples received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "deduplicated", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "non-HA", - "legendLink": null - } - ], - "title": "Distributor deduplicated/non-HA", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (distributor)", - "legendLink": null - }, - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (ingester)", - "legendLink": null - } - ], - "title": "Distributor and ingester discarded samples rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Symbol table size for loaded blocks", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Space used by local blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingesters' storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "time_series", - "legendFormat": "groups", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Number of groups", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "rules", - "legendLink": null - } - ], - "title": "Number of rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Total evaluations rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failed evaluations rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Sent notifications rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Failed notifications rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "alerts", - "legendLink": null - }, - { - "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "silences", - "legendLink": null - } - ], - "title": "Alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (User)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - ruler-query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - ruler-query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (Ruler)", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 50, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactions", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "user", - "multi": false, - "name": "user", - "options": [ ], - "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "500", - "value": "500" - }, - { - "selected": false, - "text": "1000", - "value": "1000" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Tenants", - "uid": "35fa247ce651ba189debf33d7ae41611", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-top-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Top tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by active series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By active series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by in-memory series (series created - series removed)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by discarded samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by series with exemplars", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By series with exemplars", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "exemplars/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received exemplars rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By exemplars rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group size", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group evaluation time", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Compaction Jobs", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by estimated compaction jobs from bucket-index", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By estimated compaction jobs from bucket-index", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Top tenants", - "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-top-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "125px", - "panels": [ - { - "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Writes dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Samples / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", - "fill": 1, - "format": "short", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "In-memory series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", - "fill": 1, - "format": "short", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars in ingesters", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for high-availability (HA) deduplication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for distributors ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - key-value store for the ingesters ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Uploaded blocks / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Upload latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - shipper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Compactions latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB head", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "WAL truncations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Checkpoints created / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "WAL truncations latency (includes checkpointing)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "WAL" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "mmap-ed chunks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E28A42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "WAL", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "mmap-ed chunks", - "legendLink": null - } - ], - "title": "Corruptions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB write ahead log (WAL)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "incoming exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "received exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars received rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "ingested exemplars", - "legendLink": null - } - ], - "title": "Ingester ingested exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "appended exemplars", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected distributor requests", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected ingester requests", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Instance Limits", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes.json - namespace: monitoring-system ---- -apiVersion: v1 data: tempo.yaml: | # For more information on this configuration, see the complete reference guide at @@ -43786,6 +1019,36 @@ metadata: namespace: tracing-system --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 data: MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= kind: Secret @@ -43801,18 +1064,96 @@ metadata: type: Opaque --- apiVersion: v1 -data: - JAEGER_AGENT_HOST: Z3JhZmFuYS1hZ2VudC5tb25pdG9yaW5nLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbA== - JAEGER_AGENT_PORT: NjgzMQ== - JAEGER_SAMPLER_PARAM: MQ== - JAEGER_SAMPLER_TYPE: Y29uc3Q= - JAEGER_TAGS: YXBwPXRlbXBv - TEMPO_S3_SECRET_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= -kind: Secret +data: + JAEGER_AGENT_HOST: Z3JhZmFuYS1hZ2VudC5tb25pdG9yaW5nLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbA== + JAEGER_AGENT_PORT: NjgzMQ== + JAEGER_SAMPLER_PARAM: MQ== + JAEGER_SAMPLER_TYPE: Y29uc3Q= + JAEGER_TAGS: YXBwPXRlbXBv + TEMPO_S3_SECRET_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: tempo-distributed-env-gk54k88t7g + namespace: tracing-system +type: Opaque +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service metadata: - name: tempo-distributed-env-gk54k88t7g - namespace: tracing-system -type: Opaque + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 kind: Service @@ -45068,1595 +2409,148 @@ spec: app.kubernetes.io/instance: tempo-distributed app.kubernetes.io/name: tempo --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: agent-flow-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: clustering - rules: - - alert: ClusterNotConverging - annotations: - message: Cluster is not converging. - expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) - != 0 - for: 5m - - alert: ClusterSplitBrain - annotations: - message: Cluster nodes have entered a split brain state. - expr: | - sum without (state) (cluster_node_peers) != - on (cluster, namespace) group_left - count by (cluster, namespace) (cluster_node_info) - for: 5m - - alert: ClusterLamportClockDrift - annotations: - message: Cluster nodes' lamport clocks are not converging. - expr: stddev by (cluster, namespace) (cluster_node_lamport_time) > 4 * sqrt(count - by (cluster, namespace) (cluster_node_info)) - for: 5m - - alert: ClusterNodeUnhealthy - annotations: - message: Cluster node is reporting a health score > 0. - expr: | - cluster_node_gossip_health_score > 0 - for: 5m - - alert: ClusterLamportClockStuck - annotations: - message: Cluster nodes's lamport clocks is not progressing. - expr: | - sum by (cluster, namespace, instance) (rate(cluster_node_lamport_time[2m])) == 0 - and on (cluster, namespace, instance) (cluster_node_peers > 1) - for: 5m - - alert: ClusterNodeNameConflict - annotations: - message: A node tried to join the cluster with a name conflicting with an - existing peer. - expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) - > 0 - for: 10m - - alert: ClusterNodeStuckTerminating - annotations: - message: Cluster node stuck in Terminating state. - expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) - > 0 - for: 5m - - alert: ClusterConfigurationDrift - annotations: - message: Cluster nodes are not using the same configuration file. - expr: | - count without (sha256) ( - max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) - ) > 1 - for: 5m - - name: agent_controller - rules: - - alert: SlowComponentEvaluations - annotations: - message: Flow component evaluations are taking too long. - expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) - > 0 - for: 15m - - alert: UnhealthyComponents - annotations: - message: Unhealthy Flow components detected. - expr: sum(agent_component_controller_running_components{health_type!="healthy"}) - > 0 - for: 15m ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +apiVersion: apps/v1 +kind: DaemonSet metadata: - name: mimir-mixin-alerts + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_alerts - rules: - - alert: MimirIngesterUnhealthy - annotations: - message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ - printf "%f" $value }} unhealthy ingester(s). - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy - expr: | - min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 - for: 15m - labels: - severity: critical - - alert: MimirRequestErrors - annotations: - message: | - The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors - expr: | - 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) - / - sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) - > 1 - for: 15m - labels: - severity: critical - - alert: MimirRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency - expr: | - cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} - > - 2.5 - for: 15m - labels: - severity: warning - - alert: MimirQueriesIncorrect - annotations: - message: | - The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect - expr: | - 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) - / - sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 - for: 15m - labels: - severity: warning - - alert: MimirInconsistentRuntimeConfig - annotations: - message: | - An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig - expr: | - count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 - for: 1h - labels: - severity: critical - - alert: MimirBadRuntimeConfig - annotations: - message: | - {{ $labels.job }} failed to reload runtime config. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig - expr: | - # The metric value is reset to 0 on error while reloading the config at runtime. - cortex_runtime_config_last_reload_successful == 0 - for: 5m - labels: - severity: critical - - alert: MimirFrontendQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 - for: 5m - labels: - severity: critical - - alert: MimirSchedulerQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 - for: 7m - labels: - severity: critical - - alert: MimirCacheRequestErrors - annotations: - message: | - The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors - expr: | - ( - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operation_failures_total[1m]) - or - rate(thanos_cache_operation_failures_total[1m]) - ) - / - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operations_total[1m]) - or - rate(thanos_cache_operations_total[1m]) - ) - ) * 100 > 5 - for: 5m - labels: - severity: warning - - alert: MimirIngesterRestarts - annotations: - message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts - expr: | - ( - sum by(cluster, namespace, pod) ( - increase(kube_pod_container_status_restarts_total{container=~"(ingester|mimir-write)"}[30m]) - ) - >= 2 - ) - and - ( - count by(cluster, namespace, pod) (cortex_build_info) > 0 - ) - labels: - severity: warning - - alert: MimirKVStoreFailure - annotations: - message: | - Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure - expr: | - ( - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) - / - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) - ) - # We want to get alerted only in case there's a constant failure. - == 1 - for: 5m - labels: - severity: critical - - alert: MimirMemoryMapAreasTooHigh - annotations: - message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas - close to the limit.' - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh - expr: | - process_memory_map_areas{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirIngesterInstanceHasNoTenants - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no tenants assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants - expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) - and on (cluster, namespace) - # Only if there are more time-series than would be expected due to continuous testing load - ( - sum by(cluster, namespace) (cortex_ingester_memory_series) - / - max by(cluster, namespace) (cortex_distributor_replication_factor) - ) > 100000 - for: 1h - labels: - severity: warning - - alert: MimirRulerInstanceHasNoRuleGroups - annotations: - message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no rule groups assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups - expr: | - # Alert on ruler instances in microservices mode that have no rule groups assigned, - min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?ruler.*"}) == 0 - # but only if other ruler instances of the same cell do have rule groups assigned - and on (cluster, namespace) - (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) - # and there are more than two instances overall - and on (cluster, namespace) - (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) - for: 1h - labels: - severity: warning - - alert: MimirIngestedDataTooFarInTheFuture - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has ingested samples with timestamps more than 1h in the future. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture - expr: | - max by(cluster, namespace, pod) ( - cortex_ingester_tsdb_head_max_timestamp_seconds - time() - and - cortex_ingester_tsdb_head_max_timestamp_seconds > 0 - ) > 60*60 - for: 5m - labels: - severity: warning - - alert: MimirRingMembersMismatch - annotations: - message: | - Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch - expr: | - ( - avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"})) - != sum by(cluster, namespace) (up{job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"}) - ) - and - ( - count by(cluster, namespace) (cortex_build_info) > 0 - ) - for: 15m - labels: - component: ingester - severity: warning - - name: mimir_instance_limits_alerts - rules: - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.8 - for: 3h - labels: - severity: warning - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.9 - for: 5m - labels: - severity: critical - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.7 - for: 5m - labels: - severity: warning - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirReachingTCPConnectionsLimit - annotations: - message: | - Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit - expr: | - cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and - cortex_tcp_connections_limit > 0 - for: 5m - labels: - severity: critical - - alert: MimirDistributorReachingInflightPushRequestLimit - annotations: - message: | - Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit - expr: | - ( - (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) - and ignoring (limit) - (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - name: mimir-rollout-alerts - rules: - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - max without (revision) ( - sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - unless - sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - * - ( - sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - != - sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - ) and ( - changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: statefulset - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - != - sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - ) and ( - changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: deployment - - alert: RolloutOperatorNotReconciling - annotations: - message: | - Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling - expr: | - max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 - for: 5m - labels: - severity: critical - - name: mimir-provisioning - rules: - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.65 - for: 15m - labels: - severity: warning - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.8 - for: 15m - labels: - severity: critical - - name: ruler_alerts - rules: - - alert: MimirRulerTooManyFailedPushes - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerTooManyFailedQueries - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerMissedEvaluations - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations - expr: | - 100 * ( - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) - / - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) - ) > 1 - for: 5m - labels: - severity: warning - - alert: MimirRulerFailedRingCheck - annotations: - message: | - Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck - expr: | - sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) - > 0 - for: 5m - labels: - severity: critical - - alert: MimirRulerRemoteEvaluationFailing - annotations: - message: | - Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing - expr: | - 100 * ( - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(ruler-query-frontend.*)"}[5m])) - / - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(ruler-query-frontend.*)"}[5m])) - ) > 1 - for: 5m - labels: - severity: warning - - name: gossip_alerts - rules: - - alert: MimirGossipMembersTooHigh - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a higher than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh - expr: | - max by (cluster, namespace) (memberlist_client_cluster_members_count) - > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) - for: 20m - labels: - severity: warning - - alert: MimirGossipMembersTooLow - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a lower than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow - expr: | - min by (cluster, namespace) (memberlist_client_cluster_members_count) - < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) - for: 20m - labels: - severity: warning - - name: etcd_alerts - rules: - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.65 - for: 15m - labels: - severity: warning - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.8 - for: 15m - labels: - severity: critical - - name: alertmanager_alerts - rules: - - alert: MimirAlertmanagerSyncConfigsFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing - expr: | - rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 - for: 30m - labels: - severity: critical - - alert: MimirAlertmanagerRingCheckFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing - expr: | - rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPartialStateMergeFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing - expr: | - rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerReplicationFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing - expr: | - rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPersistStateFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing - expr: | - rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 - for: 1h - labels: - severity: critical - - alert: MimirAlertmanagerInitialSyncFailed - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed - expr: | - increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 - labels: - severity: critical - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: warning - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: critical - - alert: MimirAlertmanagerInstanceHasNoTenants - annotations: - message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} owns no tenants. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants - expr: | - # Alert on alertmanager instances in microservices mode that own no tenants, - min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?alertmanager.*"}) == 0 - # but only if other instances of the same cell do have tenants assigned. - and on (cluster, namespace) - max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 - for: 1h - labels: - severity: warning - - name: mimir_blocks_alerts - rules: - - alert: MimirIngesterHasNotShippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks - expr: | - (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) - and - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) - and - # Only if the ingester has ingested samples over the last 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - and - # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica - # had ingested samples in the past, then no traffic was received for a long period and then it starts - # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving - # samples, while the a block shipping is expected within the next 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterHasNotShippedBlocksSinceStart - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart - expr: | - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) - and - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - for: 4h - labels: - severity: critical - - alert: MimirIngesterHasUnshippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't - been successfully uploaded to the storage yet. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks - expr: | - (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) - and - (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadCompactionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to compact TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed - expr: | - rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed - expr: | - rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointCreationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to create TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointDeletionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to delete TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBWALTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed - expr: | - rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 - labels: - severity: warning - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 - and - # and there is only one zone - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 - labels: - deployment: single-zone - severity: critical - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 - and - # and there are multiple zones - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 - labels: - deployment: multi-zone - severity: critical - - alert: MimirIngesterTSDBWALWritesFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to write to TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed - expr: | - rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 - for: 3m - labels: - severity: critical - - alert: MimirStoreGatewayHasNotSyncTheBucket - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} has not successfully synched the bucket since {{ $value - | humanizeDuration }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket - expr: | - (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) - and - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 - for: 5m - labels: - severity: critical - - alert: MimirStoreGatewayNoSyncedTenants - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not syncing any blocks for any tenant. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants - expr: | - min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 - for: 1h - labels: - severity: warning - - alert: MimirBucketIndexNotUpdated - annotations: - message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster - }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated - expr: | - min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 - labels: - severity: critical - - name: mimir_compactor_alerts - rules: - - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully cleaned up blocks in the last 6 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) - for: 1h - labels: - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) - and - (cortex_compactor_last_successful_run_timestamp_seconds > 0) - for: 1h - labels: - reason: in-last-24h - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - cortex_compactor_last_successful_run_timestamp_seconds == 0 - for: 24h - labels: - reason: since-startup - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} failed to run 2 consecutive compactions. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 - labels: - reason: consecutive-failures - severity: critical - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) - and - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 15m - labels: - severity: critical - time_period: 24h - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block since its start. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 24h - labels: - severity: critical - time_period: since-start - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 - for: 1m - labels: - severity: warning - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 - for: 30m - labels: - severity: critical - - name: mimir_autoscaling - rules: - - alert: MimirAutoscalerNotActive - annotations: - message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler - }} in {{ $labels.namespace }} is not active. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive - expr: | - ( - label_replace(( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - # Add "metric" label. - + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0), - "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" - ) - ) - # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, - # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported - # by KEDA could not exist at all or being exposed with a value of 0. - and on (cluster, namespace, metric, scaledObject) - (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) - for: 1h - labels: - severity: critical - - alert: MimirAutoscalerKedaFailing - annotations: - message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace - }} is experiencing errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing - expr: | - ( - # Find KEDA scalers reporting errors. - label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - ) - > 0 - for: 1h - labels: - severity: critical - - name: mimir_continuous_test - rules: - - alert: MimirContinuousTestNotRunningOnWrites - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because writes are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestNotRunningOnReads - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because queries are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestFailed + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} failed when asserting query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent labels: - severity: warning + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-h9mgdthkmd + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate --- apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +kind: ServiceMonitor metadata: - name: mimir-mixin-rules + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_api_1 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / - sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:sum_rate - - name: mimir_api_2 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job, route) - record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, - route) - record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_api_3 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_querier_api - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job) - record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - name: mimir_storage - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:50quantile - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds:avg - - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate - - name: mimir_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:50quantile - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - by (cluster, job) - record: cluster_job:cortex_query_frontend_retries:avg - - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) - record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by - (cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by - (le, cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate - - name: mimir_ingester_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:50quantile - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series:avg - - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:50quantile - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples:avg - - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:50quantile - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars:avg - - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate - - name: mimir_received_samples - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_samples:rate5m - - name: mimir_exemplars_in - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) - record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m - - name: mimir_received_exemplars - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m - - name: mimir_exemplars_ingested - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) - record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m - - name: mimir_exemplars_appended - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) - record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m - - name: mimir_scaling_rules - rules: - - expr: | - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (cluster, namespace, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - record: cluster_namespace_deployment:actual_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - / 240000 - ) - labels: - deployment: distributor - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 240000 - ) - labels: - deployment: distributor - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - * 3 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by(cluster, namespace) ( - cortex_ingester_memory_series - )[24h:] - ) - / 1500000 - ) - labels: - deployment: ingester - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) - * 3 * 0.59999999999999998 / 1500000 - ) - labels: - deployment: ingester - reason: active_series_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - (sum by (cluster, namespace) ( - cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} - ) / 4) - / - avg by (cluster, namespace) ( - memcached_limit_bytes{job=~".+/memcached"} - ) - ) - labels: - deployment: memcached - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate - - expr: | - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - labels: - reason: cpu_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_memory_usage_bytes:sum - - expr: | - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) - labels: - reason: memory_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - name: mimir_alertmanager_rules - rules: - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_alerts) - record: cluster_job_pod:cortex_alertmanager_alerts:sum - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_silences) - record: cluster_job_pod:cortex_alertmanager_silences:sum - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m - - name: mimir_ingester_rules - rules: - - expr: | - sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) - record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -46691,3 +2585,37 @@ spec: app.kubernetes.io/component: mimir app.kubernetes.io/instance: mimir-monolithic-mode app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/microservices-mode/traces/kustomization.yaml b/kubernetes/microservices-mode/traces/kustomization.yaml index a24d15e8..0697b93b 100644 --- a/kubernetes/microservices-mode/traces/kustomization.yaml +++ b/kubernetes/microservices-mode/traces/kustomization.yaml @@ -8,11 +8,11 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - tempo-distributed + +# optional - ../../monolithic-mode/metrics/mimir -- ../../../monitoring-mixins/agent-flow-mixin/deploy -- ../../../monitoring-mixins/go-runtime-mixin/deploy -- ../../../monitoring-mixins/mimir-mixin/deploy secretGenerator: - name: tempo-distributed-env @@ -28,19 +28,10 @@ secretGenerator: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-tempo.yaml - - name: tempo-distributed-config namespace: tracing-system behavior: replace diff --git a/kubernetes/monolithic-mode/all-in-one/configs/config.river b/kubernetes/monolithic-mode/all-in-one/configs/config.river index 8ebbb6f8..8d77aa9f 100644 --- a/kubernetes/monolithic-mode/all-in-one/configs/config.river +++ b/kubernetes/monolithic-mode/all-in-one/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" diff --git a/kubernetes/monolithic-mode/all-in-one/configs/grafana-datasources-all-in-one.yaml b/kubernetes/monolithic-mode/all-in-one/configs/grafana-datasources-all-in-one.yaml deleted file mode 100644 index ac1358c2..00000000 --- a/kubernetes/monolithic-mode/all-in-one/configs/grafana-datasources-all-in-one.yaml +++ /dev/null @@ -1,108 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Metrics - uid: metrics -- name: Logs - uid: logs -- name: Traces - uid: traces -- name: Profiles - uid: profiles - -datasources: -# Mimir for metrics -- name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true - jsonData: - prometheusType: Mimir - exemplarTraceIdDestinations: - - name: traceID - datasourceUid: traces - -# Loki for logs -- name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: false - version: 1 - editable: true - jsonData: - derivedFields: - - datasourceUid: traces - matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)" - name: traceID - url: $${__value.raw} - -# Tempo for traces -- name: Traces - type: tempo - access: proxy - uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 - basicAuth: false - isDefault: false - version: 1 - editable: true - apiVersion: 1 - jsonData: - search: - hide: false - lokiSearch: - datasourceUid: logs - nodeGraph: - enabled: true - serviceMap: - datasourceUid: metrics - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - spanBar: - type: 'Tag' - tag: 'http.path' - tracesToMetrics: - datasourceUid: metrics - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'service.name', value: 'service' }] - queries: - - name: '(R) Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' - - name: '(E) Error Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' - - name: '(D) Duration' - query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' - tracesToLogsV2: - datasourceUid: logs - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'app', value: 'app' }] - filterByTraceID: false - filterBySpanID: false - tracesToProfiles: - customQuery: false - datasourceUid: "profiles" - profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds" - tags: [{ key: 'app', value: 'service_name' }] - -# Pyroscope for profiles -- name: Profiles - type: grafana-pyroscope-datasource - uid: profiles - access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 - basicAuth: false - isDefault: false - version: 1 - editable: true \ No newline at end of file diff --git a/kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml b/kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml index 303cd5cf..0d68b495 100644 --- a/kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml +++ b/kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml @@ -28,6 +28,18 @@ metadata: --- apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/component: mimir @@ -91,6 +103,108 @@ rules: --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole metadata: labels: app.kubernetes.io/instance: loki @@ -132,6 +246,25 @@ subjects: --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding metadata: labels: app.kubernetes.io/instance: loki @@ -440,865 +573,15 @@ metadata: namespace: logging-system --- apiVersion: v1 -data: - agent-cluster-node.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Node Info", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Lamport clock time" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Internal cluster state observers" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip health score" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip protocol version" - } - ], - "title": "Node Info", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #(.*)", - "renamePattern": "$1" - } - }, - { - "id": "reduce", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { }, - "indexByName": { }, - "renameByName": { - "Field": "Metric", - "Max": "Value" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{event}}", - "range": true - } - ], - "title": "Gossip ops/s", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:peers" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Known peers", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node by state (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "{{state}}", - "range": true - } - ], - "title": "Peers by state", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "title": "Gossip Transport", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Transport bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Packet write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", - "fieldConfig": { - "defaults": { - "unit": "pkts" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "tx queue", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "rx queue", - "range": true - } - ], - "title": "Pending packet queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Stream bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Stream write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "Open streams", - "range": true - } - ], - "title": "Open transport streams", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "instance", - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Node", - "uid": "dd370cd333b2d9258435fb1b5a20a89b" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-node.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-cluster-overview.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Nodes", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Nodes info.\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Dashboard" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "index": 0, - "text": "Link" - } - }, - "type": "value" - } - ] - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Detail dashboard for node", - "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Node table", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": false, - "__name__": true, - "cluster": true, - "namespace": true, - "state": false - }, - "indexByName": { }, - "renameByName": { - "Value": "Dashboard", - "instance": "", - "state": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "1": { - "color": "red", - "index": 1, - "text": "Not converged" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "green", - "index": 0, - "text": "Converged" - } - }, - "type": "special" - } - ], - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 9 - }, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Convergance state", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 80, - "spanNulls": true - }, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "text": "Yes" - } - }, - "type": "value" - }, - { - "options": { - "1": { - "color": "red", - "text": "No" - } - }, - "type": "value" - } - ], - "max": 1, - "noValue": 0 - } - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 9 - }, - "options": { - "mergeValues": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", - "instant": false, - "legendFormat": "Converged", - "range": true - } - ], - "title": "Convergance state timeline", - "type": "state-timeline" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Overview", - "uid": "7e07f9c975fcfc2a6e120a95f579f843" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-overview.json - namespace: monitoring-system ---- -apiVersion: v1 data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -1327,28194 +610,485 @@ data: \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-controller.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component controller documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", - "fieldConfig": { - "defaults": { - "unit": "agents" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 0 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running agents", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The number of running components across all running agents.\n", - "fieldConfig": { - "defaults": { - "unit": "components" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 4 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running components", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The percentage of components which are in a healthy state.\n", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "No components", - "unit": "percentunit" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 8 - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "text": { - "valueSize": 80 - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Overall component health", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", - "fieldConfig": { - "defaults": { - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Unhealthy" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unknown" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "blue", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Exited" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 14, - "x": 10, - "y": 0 - }, - "options": { - "orientation": "vertical", - "showUnfilled": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Healthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Unhealthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", - "instant": true, - "legendFormat": "Unknown", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", - "instant": true, - "legendFormat": "Exited", - "range": false - } - ], - "title": "Components by health", - "type": "bargauge" - }, - { - "datasource": "${datasource}", - "description": "The frequency at which components get updated.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "options": { - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Component evaluation rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Component evaluation time", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Slow components evaluation times", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component evaluation histogram", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component dependency wait histogram", - "type": "heatmap" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Controller", - "uid": "f861e5fef2e795edd5c4c73bee1ba769" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-controller.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-opentelemetry.json: |- - { - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Receivers for traces [otelcol.receiver]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully pushed into the pipeline.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Accepted spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans that could not be pushed into the pipeline.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Refused spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The duration of inbound RPCs.\n", - "fieldConfig": { - "defaults": { - "unit": "milliseconds" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "RPC server duration (traces)", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "title": "Batching [otelcol.processor.batch]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of units in the batch\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Number of units in the batch", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Number of distinct metadata value combinations being processed\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Distinct metadata values", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of times the batch was sent due to a timeout trigger\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Timeout trigger", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "title": "Exporters for traces [otelcol.exporter]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully sent to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported sent spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans in failed attempts to send to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported failed spans", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / OpenTelemetry", - "uid": "c90e752eb8c0fce588f906b7279aceea" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-opentelemetry.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-prometheus-remote-write.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "prometheus.scrape", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "% of targets successfully scraped", - "range": true - } - ], - "title": "Scrape success rate in $cluster", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p99", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p95", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p50", - "range": true - } - ], - "title": "Scrape duration in $cluster", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "title": "prometheus.remote_write", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "WAL delay", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Data write throughput", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Minimum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Maximum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Minimum", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Maximum", - "range": true - } - ], - "title": "Shards", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total outgoing samples sent by prometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Sent samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Failed samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Retried samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "showLegend": false - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "Series", - "range": true - } - ], - "title": "Active series (total)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Active series (by instance/component)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Active series (by component)", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "component", - "multi": true, - "name": "component", - "query": { - "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", - "refId": "component" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "url", - "multi": true, - "name": "url", - "query": { - "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", - "refId": "url" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Prometheus Components", - "uid": "ee34ffa2d084547d650e1d96a26306aa" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-prometheus-remote-write.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-resources.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Resident memory size of the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate at which the Grafana Agent process performs garbage collections.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Garbage collections", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Heap memory currently in use by the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (heap inuse)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network send bandwidth", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Resources", - "uid": "d47aae5c53be5550f8e3bc8a904ba61a" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - go-runtime.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Go runtime metrics", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 14, - "iteration": 1623758038990, - "links": [ ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average total bytes of memory reserved across all process instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Total Reserved Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average stack memory usage across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: stack inuse (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Stack Memory Use", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mspan (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mcache (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: buck hash (avg)", - "refId": "E" - }, - { - "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: gc (avg)", - "refId": "F" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Other Memory Reservations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap reserved (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap in use (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap alloc (avg)", - "refId": "C" - }, - { - "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap idle (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap released (avg)", - "refId": "E" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average allocation rate in bytes per second, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{job}}: bytes malloced/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Allocation Rate, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average rate of heap object allocation, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{job}}: obj mallocs/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Object Allocation Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of live memory objects across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: object count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Number of Live Objects", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of goroutines across instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: goroutine count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: min gc time (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: max gc time (avg)", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GC min & max duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} next gc bytes (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Next GC, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "go-runtime" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "MONITORING", - "value": "MONITORING" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "pilot", - "value": "pilot" - }, - "datasource": "$datasource", - "definition": "label_values(go_info, job)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "job", - "multi": false, - "name": "job", - "options": [ ], - "query": { - "query": "label_values(go_info, job)", - "refId": "MONITORING-job-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "definition": "label_values(go_info{job=\"$job\"}, instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": { - "query": "label_values(go_info{job=\"$job\"}, instance)", - "refId": "MONITORING-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Go runtime metrics", - "uid": "T4sSTLBGzgp", - "version": 1 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Go - Runtime - labels: - grafana_dashboard: "1" - name: go-runtime.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - datasources.yaml: |- - apiVersion: 1 - - deleteDatasources: - - name: Metrics - uid: metrics - - name: Logs - uid: logs - - name: Traces - uid: traces - - name: Profiles - uid: profiles - - datasources: - # Mimir for metrics - - name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true - jsonData: - prometheusType: Mimir - exemplarTraceIdDestinations: - - name: traceID - datasourceUid: traces - - # Loki for logs - - name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: false - version: 1 - editable: true - jsonData: - derivedFields: - - datasourceUid: traces - matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)" - name: traceID - url: $${__value.raw} - - # Tempo for traces - - name: Traces - type: tempo - access: proxy - uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 - basicAuth: false - isDefault: false - version: 1 - editable: true - apiVersion: 1 - jsonData: - search: - hide: false - lokiSearch: - datasourceUid: logs - nodeGraph: - enabled: true - serviceMap: - datasourceUid: metrics - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - spanBar: - type: 'Tag' - tag: 'http.path' - tracesToMetrics: - datasourceUid: metrics - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'service.name', value: 'service' }] - queries: - - name: '(R) Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' - - name: '(E) Error Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' - - name: '(D) Duration' - query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' - tracesToLogsV2: - datasourceUid: logs - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'app', value: 'app' }] - filterByTraceID: false - filterBySpanID: false - tracesToProfiles: - customQuery: false - datasourceUid: "profiles" - profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds" - tags: [{ key: 'app', value: 'service_name' }] - - # Pyroscope for profiles - - name: Profiles - type: grafana-pyroscope-datasource - uid: profiles - access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 - basicAuth: false - isDefault: false - version: 1 - editable: true -kind: ConfigMap -metadata: - labels: - grafana_datasource: "1" - name: grafana-datasources-757dh59h8m - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-chunks.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Series", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", - "format": "time_series", - "legendFormat": "chunks", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Chunks per series", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Active Series / Chunks", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Utilization", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Age", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Flush Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Log Entries Per Chunk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", - "format": "time_series", - "legendFormat": "Index Entries", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Index Entries Per Chunk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Flush Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Queue Length", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Flush Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Flush Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Chunks Flushed/Second", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Chunk Flush Reason", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": 1, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Flush Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "$datasource", - "heatmap": { }, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 11, - "legend": { - "show": true - }, - "span": 12, - "targets": [ - { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Chunk Utilization", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "percentunit", - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Utilization", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "$datasource", - "heatmap": { }, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 12, - "legend": { - "show": true - }, - "span": 12, - "targets": [ - { - "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Chunk Size Bytes", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "bytes", - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Utilization", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", - "format": "time_series", - "legendFormat": "p90", - "legendLink": null - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Chunk Size Quantiles", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Utilization", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - }, - { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Chunk Duration hours (end-start)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Duration", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Chunks", - "uid": "chunks", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-chunks.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-deletion.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "none", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Number of Pending Requests", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "dtdurations", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Oldest Pending Request Age", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", - "format": "time_series", - "legendFormat": "in progress", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "# of Delete Requests (received - processed) ", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", - "format": "time_series", - "legendFormat": "received", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Requests Received / Day", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", - "format": "time_series", - "legendFormat": "processed", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Requests Processed / Day", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Churn", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Compactor CPU usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", - "format": "time_series", - "legendFormat": " {{pod}} ", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Compactor memory usage (MiB)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Compaction run duration (seconds)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", - "format": "time_series", - "legendFormat": "failures", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Failures in Loading Delete Requests / Hour", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Lines Deleted / Sec", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Deletion metrics", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$loki_datasource", - "id": 11, - "span": 6, - "targets": [ - { - "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", - "refId": "A" - } - ], - "title": "In progress/finished", - "type": "logs" - }, - { - "datasource": "$loki_datasource", - "id": 12, - "span": 6, - "targets": [ - { - "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", - "refId": "A" - } - ], - "title": "Requests", - "type": "logs" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "List of deletion requests", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Deletion", - "uid": "deletion", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-deletion.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-logs.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "iteration": 1583185057230, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 35, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 3, - "y": 0 - }, - "hiddenSeries": false, - "id": 41, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(go_gc_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)", - "legendFormat": "{{quantile}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "gc duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 6, - "y": 0 - }, - "hiddenSeries": false, - "id": 36, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "cpu", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 9, - "y": 0 - }, - "hiddenSeries": false, - "id": 40, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "working set", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "tx", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 0 - }, - "hiddenSeries": false, - "id": 39, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "rx", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 18, - "y": 0 - }, - "hiddenSeries": false, - "id": 37, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) > 0", - "legendFormat": "{{reason}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "restarts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 3, - "x": 21, - "y": 0 - }, - "hiddenSeries": false, - "id": 42, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)", - "legendFormat": "{{level}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "bad words", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$loki_datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 4 - }, - "hiddenSeries": false, - "id": 31, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "warn", - "color": "#FF780A" - }, - { - "alias": "error", - "color": "#E02F44" - }, - { - "alias": "info", - "color": "#56A64B" - }, - { - "alias": "debug", - "color": "#3274D9" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)", - "intervalFactor": 3, - "legendFormat": "{{level}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Log Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": "$loki_datasource", - "gridPos": { - "h": 19, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 29, - "maxDataPoints": "", - "options": { - "showLabels": false, - "showTime": true, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "targets": [ - { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=\"$level\" |= \"$filter\"", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Logs", - "type": "logs" - } - ], - "refresh": "10s", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "label": null, - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "deployment", - "options": [ ], - "query": "label_values(kube_deployment_created{cluster=\"$cluster\", namespace=\"$namespace\"}, deployment)", - "refresh": 0, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "pod", - "options": [ ], - "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)", - "refresh": 0, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "container", - "options": [ ], - "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)", - "refresh": 0, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "", - "value": "" - }, - "hide": 0, - "includeAll": false, - "label": "", - "multi": true, - "name": "level", - "options": [ - { - "selected": false, - "text": "debug", - "value": "debug" - }, - { - "selected": false, - "text": "info", - "value": "info" - }, - { - "selected": false, - "text": "warn", - "value": "warn" - }, - { - "selected": false, - "text": "error", - "value": "error" - } - ], - "query": "debug,info,warn,error", - "refresh": 0, - "type": "custom" - }, - { - "current": { - "selected": false, - "text": "", - "value": "" - }, - "label": "LogQL Filter", - "name": "filter", - "query": "", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Logs", - "uid": "logs", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-logs.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-mixin-recording-rules.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "iteration": 1635347545534, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "liveNow": false, - "panels": [ - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 2, - "x": 0, - "y": 0 - }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "8.3.0-38205pre", - "targets": [ - { - "datasource": "${datasource}", - "exemplar": false, - "expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Appenders Not Ready", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 11, - "x": 2, - "y": 0 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "exemplar": true, - "expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", - "interval": "", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "title": "Samples Appended to WAL per Second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Series are unique combinations of labels", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 11, - "x": 13, - "y": 0 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "exemplar": true, - "expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", - "interval": "", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "title": "Series Created per Second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "exemplar": true, - "expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )", - "interval": "", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "title": "Write Behind", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 10 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "exemplar": true, - "expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", - "interval": "", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "title": "Samples Sent per Second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 20 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "exemplar": true, - "expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})", - "interval": "", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "title": "WAL Disk Size", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 20 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "exemplar": true, - "expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0", - "interval": "", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "title": "Pending Samples", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "label": null, - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "tenant", - "options": [ ], - "query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))", - "refresh": 0, - "regex": "/\"([^\"]+)\"/", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Recording Rules", - "uid": "recording-rules", - "version": 0, - "weekStart": "" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-mixin-recording-rules.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-operational.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "iteration": 1588704280892, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 17, - "panels": [ ], - "targets": [ ], - "title": "Main", - "type": "row" - }, - { - "aliasColors": { - "5xx": "red" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 1 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Queries/Second", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 10, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "5xx": "red" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 1 - }, - "hiddenSeries": false, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Pushes/Second", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 10, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "hiddenSeries": false, - "id": 2, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Lines Per Tenant (top 10)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 1 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024", - "legendFormat": "{{tenant}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "MBs Per Tenant (Top 10)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 1 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) > 0", - "hide": false, - "interval": "", - "legendFormat": "{{container}}-{{pod}}", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Container Restarts", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 6 - }, - "hiddenSeries": false, - "id": 9, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".9", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".5", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Push Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 6 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".9", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".5", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Distributor Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 6 - }, - "hiddenSeries": false, - "id": 71, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Distributor Success Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": "", - "logBase": 1, - "max": "1", - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 11 - }, - "hiddenSeries": false, - "id": 13, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", - "hide": false, - "legendFormat": ".9", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", - "hide": false, - "legendFormat": ".5", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Ingester Latency Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 11 - }, - "hiddenSeries": false, - "id": 72, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Ingester Success Rate Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": "", - "logBase": 1, - "max": "1", - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 10, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", - "legendFormat": "{{route}}-.99", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", - "legendFormat": "{{route}}-.9", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", - "legendFormat": "{{route}}-.5", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Query Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", - "legendFormat": ".99-{{route}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", - "legendFormat": ".9-{{route}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", - "legendFormat": ".5-{{route}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Querier Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 16 - }, - "hiddenSeries": false, - "id": 73, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Querier Success Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": "", - "logBase": 1, - "max": "1", - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 21 - }, - "hiddenSeries": false, - "id": 15, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", - "legendFormat": ".99-{{route}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", - "legendFormat": ".9-{{route}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", - "legendFormat": ".5-{{route}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Ingester Latency Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 21 - }, - "hiddenSeries": false, - "id": 74, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Ingester Success Rate Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": "", - "logBase": 1, - "max": "1", - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 110, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 27 - }, - "hiddenSeries": false, - "id": 112, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))", - "interval": "", - "legendFormat": "{{ tenant }} - {{ reason }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Discarded Lines", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "columns": [ ], - "datasource": "$datasource", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 27 - }, - "id": 113, - "pageSize": null, - "panels": [ ], - "showHeader": true, - "sort": { - "col": 3, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "tenant", - "thresholds": [ ], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "{{ tenant }} - {{ reason }}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Discarded Lines Per Interval", - "transform": "table", - "type": "table-old" - } - ], - "targets": [ ], - "title": "Limits", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 23, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 28 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"distributor.*\"}[$__rate_interval]))", - "intervalFactor": 3, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 28 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"distributor.*\"}", - "instant": false, - "intervalFactor": 3, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$loki_datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 28 - }, - "hiddenSeries": false, - "id": 31, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "{}", - "color": "#C4162A" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[1m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Error Log Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": "$loki_datasource", - "gridPos": { - "h": 18, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 29, - "options": { - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "panels": [ ], - "targets": [ - { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} |= \"level=error\"", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Logs", - "type": "logs" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 33, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Success Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 35 - }, - "hiddenSeries": false, - "id": 32, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Append Failures By Ingester", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 34, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Bytes Received/Second", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 42 - }, - "hiddenSeries": false, - "id": 35, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Lines Received/Second", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Distributor", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 19, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 29 - }, - "hiddenSeries": false, - "id": 36, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"ingester.*\"}[$__rate_interval]))", - "intervalFactor": 3, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 36 - }, - "hiddenSeries": false, - "id": 37, - "legend": { - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"ingester.*\"}", - "instant": false, - "intervalFactor": 3, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$loki_datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 3, - "w": 18, - "x": 12, - "y": 29 - }, - "hiddenSeries": false, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "{}", - "color": "#F2495C" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[1m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Error Log Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": "$loki_datasource", - "gridPos": { - "h": 18, - "w": 18, - "x": 12, - "y": 32 - }, - "id": 39, - "options": { - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "panels": [ ], - "targets": [ - { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} |= \"level=error\"", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Logs", - "type": "logs" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 41 - }, - "hiddenSeries": false, - "id": 67, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Success Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Ingester", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 104, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 30 - }, - "hiddenSeries": false, - "id": 106, - "legend": { - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}))", - "interval": "", - "legendFormat": "{{ tenant }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Active Streams", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 30 - }, - "hiddenSeries": false, - "id": 108, - "legend": { - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]) > 0))", - "interval": "", - "legendFormat": "{{ tenant }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Streams Created/Sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Streams", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 94, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 31 - }, - "hiddenSeries": false, - "id": 102, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "De-Dupe Ratio", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]))", - "interval": "", - "legendFormat": "Chunks", - "refId": "A" - }, - { - "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m])) < 1", - "interval": "", - "legendFormat": "De-Dupe Ratio", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Chunks Flushed/Sec", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "$datasource", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 31 - }, - "heatmap": { }, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 100, - "legend": { - "show": true - }, - "panels": [ ], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, - "interval": "", - "legendFormat": "{{ le }}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Chunk Size Bytes", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "bytes", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 7, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 39 - }, - "hiddenSeries": false, - "id": 96, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{ reason }}" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Chunk Flush Reason %", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "$datasource", - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 39 - }, - "heatmap": { }, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 98, - "legend": { - "show": true - }, - "panels": [ ], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))", - "format": "heatmap", - "instant": false, - "interval": "", - "legendFormat": "{{ le }}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Chunk Utilization", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "percentunit", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null - } - ], - "targets": [ ], - "title": "Chunks", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 64, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 68, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"querier.*\"}[$__rate_interval]))", - "intervalFactor": 3, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 39 - }, - "hiddenSeries": false, - "id": 69, - "legend": { - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"querier.*\"}", - "instant": false, - "intervalFactor": 3, - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$loki_datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 3, - "w": 18, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 65, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "{}", - "color": "#F2495C" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[1m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Error Log Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": "$loki_datasource", - "gridPos": { - "h": 18, - "w": 18, - "x": 12, - "y": 35 - }, - "id": 66, - "options": { - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "panels": [ ], - "targets": [ - { - "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} |= \"level=error\"", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Logs", - "type": "logs" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 46 - }, - "hiddenSeries": false, - "id": 70, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Success Rate", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Querier", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 52, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 30 - }, - "hiddenSeries": false, - "id": 53, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", - "intervalFactor": 1, - "legendFormat": "{{container}}: .99-{{method}}-{{name}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", - "hide": false, - "legendFormat": "{{container}}: .9-{{method}}-{{name}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", - "hide": false, - "legendFormat": "{{container}}: .5-{{method}}-{{name}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 38 - }, - "hiddenSeries": false, - "id": 54, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)", - "intervalFactor": 1, - "legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Memcached", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 57, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 31 - }, - "hiddenSeries": false, - "id": 55, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 39 - }, - "hiddenSeries": false, - "id": 58, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Consul", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 34 - }, - "id": 43, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 9 - }, - "hiddenSeries": false, - "id": 41, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".9", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "MutateRows Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 9 - }, - "hiddenSeries": false, - "id": 46, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", - "interval": "", - "legendFormat": "90%", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", - "interval": "", - "legendFormat": "50%", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "ReadRows Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 9 - }, - "hiddenSeries": false, - "id": 44, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", - "interval": "", - "legendFormat": "90%", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", - "interval": "", - "legendFormat": "50%", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GetTable Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 9 - }, - "hiddenSeries": false, - "id": 45, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".9", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "ListTables Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 47, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "MutateRows Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 16 - }, - "hiddenSeries": false, - "id": 50, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "ReadRows Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 48, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GetTable Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 16 - }, - "hiddenSeries": false, - "id": 49, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "ListTables Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Big Table", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 35 - }, - "id": 60, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 33 - }, - "hiddenSeries": false, - "id": 61, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 41 - }, - "hiddenSeries": false, - "id": 62, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "GCS", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 36 - }, - "id": 76, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 9 - }, - "id": 82, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Failure Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 9 - }, - "id": 83, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Consumed Capacity Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 9 - }, - "id": 84, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Throttled Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 9 - }, - "id": 85, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Dropped Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 15 - }, - "id": 86, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", - "legendFormat": ".9", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", - "legendFormat": ".5", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Query Pages", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 9, - "x": 6, - "y": 15 - }, - "id": 87, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 6, - "w": 9, - "x": 15, - "y": 15 - }, - "id": 88, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Dynamo", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 78, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 79, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 80, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "S3", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 78, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 79, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 80, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "Azure Blob", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 114, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 115, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "intervalFactor": 1, - "legendFormat": ".99-{{operation}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".9-{{operation}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", - "hide": false, - "legendFormat": ".5-{{operation}}", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Latency By Operation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 116, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", - "intervalFactor": 1, - "legendFormat": "{{status_code}}-{{operation}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Status By Method", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "targets": [ ], - "title": "BoltDB Shipper", - "type": "row" - } - ], - "refresh": "10s", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "hide": 0, - "label": null, - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Operational", - "uid": "operational", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-operational.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-reads-resources.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query Frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query Scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "gridPos": { }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Writes", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "gridPos": { }, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Reads", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilization", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "gridPos": { }, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Writes", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "gridPos": { }, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Reads", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"index-gateway.*\"})", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilization", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Index Gateway", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Rules", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 25, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Reads Resources", - "uid": "reads-resources", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-reads.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", - "format": "time_series", - "legendFormat": "{{ route }} Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "__auto", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Pod Latency (p99)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Frontend (query-frontend)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", - "format": "time_series", - "legendFormat": "{{ route }} Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "__auto", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Pod Latency (p99)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", - "format": "time_series", - "legendFormat": "{{ route }} Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "__auto", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Pod Latency (p99)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", - "format": "time_series", - "legendFormat": "{{ route }} 50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", - "format": "time_series", - "legendFormat": "{{ route }} Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "__auto", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Pod Latency (p99)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - Zone Aware", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "__auto", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Pod Latency (p99)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Index", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "__auto", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Pod Latency (p99)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "BoltDB Shipper", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Reads", - "uid": "reads", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-retention.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Resource Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "custom": { }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "dateTimeFromNow" - } - }, - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { }, - "textMode": "auto" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Last Compact Tables Operation Success", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", - "format": "time_series", - "legendFormat": "duration", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Compact Tables Operations Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compaction", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", - "format": "time_series", - "legendFormat": "{{table_name}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Number of times Tables were skipped during Compaction", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{success}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Compact Tables Operations Per Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "custom": { }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "dateTimeFromNow" - } - }, - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { }, - "textMode": "auto" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Last Mark Operation Success", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", - "format": "time_series", - "legendFormat": "duration", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Mark Operations Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{success}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Mark Operations Per Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Retention", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", - "format": "time_series", - "legendFormat": "{{action}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Processed Tables Per Action", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", - "format": "time_series", - "legendFormat": "{{table}}-{{action}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Modified Tables", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", - "format": "time_series", - "legendFormat": "{{table}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Marks Creation Rate Per Table", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Per Table Marker", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Marked Chunks (24h)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Mark Table Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Delete Chunks (24h)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sweeper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", - "format": "time_series", - "legendFormat": "lag", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Sweeper Lag", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", - "format": "time_series", - "legendFormat": "count", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Marks Files to Process", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{status}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Rate Per Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$loki_datasource", - "id": 21, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}", - "refId": "A" - } - ], - "title": "Compactor Logs", - "type": "logs" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Logs", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "label": null, - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Retention", - "uid": "retention", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-retention.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-writes-resources.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "In-memory streams", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "request", - "color": "#FFC000", - "fill": 0 - }, - { - "alias": "limit", - "color": "#E02F44", - "fill": 0 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "gridPos": { }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Writes", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "gridPos": { }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Reads", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilization", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Writes Resources", - "uid": "writes-resources", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-writes-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - loki-writes.json: |- - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "bytes", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Total Received Bytes", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{tenant}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Per Tenant", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": 1, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - Structured Metadata", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - Zone Aware", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Index", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "BoltDB Shipper", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Writes", - "uid": "writes", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-writes.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager resources", - "uid": "a6883fb22799ac74479c7db872451092", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total alerts", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total silences", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "APS", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts received", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "per pod Active Aggregation Groups", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts grouping", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alert notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod tenants", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod silences", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Replication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Syncs/sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Syncs/sec (by reason)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "errors", - "legendLink": null - } - ], - "title": "Ring check errors/sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant configuration sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "{{outcome}}", - "legendLink": null - } - ], - "title": "Initial syncs /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Initial sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "1m", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Fetch state from other alertmanagers /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding initial state sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Replicate state to other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Merge state from other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Persist state to remote storage /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding runtime state sync", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager", - "uid": "b0d38d318bbddd80476246d4930f9e55", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager.json + name: agent-config-d78c7bkcmg namespace: monitoring-system --- apiVersion: v1 data: - mimir-compactor-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU and memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", - "version": 0 - } + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor-resources.json + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - mimir-compactor.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "completed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "started" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#34CCEB", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "started", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "completed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Per-instance runs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", - "fieldConfig": { - "defaults": { - "max": 1, - "noValue": 1, - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Tenants compaction progress", - "type": "timeseries" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "No compactor data", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "text", - "text": "No successful runs since startup yet" - }, - "to": 0 - }, - "type": "range" - } - ] - }, - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 7200 - }, - { - "color": "orange", - "value": 21600 - }, - { - "color": "red", - "value": 43200 - } - ] - } - } - ] - } - ] - }, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "reduceOptions": { - "calcs": [ - "first" - ], - "fields": "/^Last run$/", - "values": false - }, - "textMode": "value" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Longest time since last successful run", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - } - ], - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Status" - }, - "properties": [ - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "transparent", - "text": "N/A" - }, - "to": 0 - }, - "type": "range" - }, - { - "options": { - "from": 0, - "result": { - "color": "green", - "text": "Ok" - }, - "to": 7200 - }, - "type": "range" - }, - { - "options": { - "from": 7200, - "result": { - "color": "yellow", - "text": "Delayed" - }, - "to": 21600 - }, - "type": "range" - }, - { - "options": { - "from": 21600, - "result": { - "color": "orange", - "text": "Late" - }, - "to": 43200 - }, - "type": "range" - }, - { - "options": { - "from": 43200, - "result": { - "color": "red", - "text": "Very late" - }, - "to": "Infinity" - }, - "type": "range" - }, - { - "options": { - "match": "null+nan", - "result": { - "color": "transparent", - "text": "Unknown" - } - }, - "type": "special" - } - ] - }, - { - "id": "custom.width", - "value": 86 - }, - { - "id": "custom.align", - "value": "center" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "text": "Never" - }, - "to": 0 - }, - "type": "range" - } - ] - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "legendFormat": "Last run", - "legendLink": null - } - ], - "title": "Last successful run per-compactor replica", - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - }, - { - "id": "calculateField", - "options": { - "alias": "One", - "binary": { - "left": "Last run", - "operator": "/", - "right": "Last run" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "calculateField", - "options": { - "alias": "Status", - "binary": { - "left": "Last run", - "operator": "*", - "right": "One" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Compactor", - "Last run", - "Status" - ] - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", - "format": "time_series", - "legendFormat": "Jobs", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "compactions", - "legendLink": null - } - ], - "title": "TSDB compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "TSDB compaction duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "Average blocks / tenant", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Tenants with largest number of blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks marked for deletion / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks deletions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Garbage collector", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Metadata syncs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Metadata sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Metadata sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Object Store", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Key-value store for compactors ring", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", - "version": 0 - } + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor.json + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 @@ -29659,33175 +1233,6 @@ metadata: namespace: monitoring-system --- apiVersion: v1 -data: - mimir-config.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Startup config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Startup config file", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Runtime config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Runtime config file", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Config", - "uid": "5d9d0b4724c0f80d68467088ec61e003", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-config.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-object-store.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "RPS / component", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "Error rate / component", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Components", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "RPS / operation", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate / operation", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Operations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Object Store", - "uid": "e1324ee2a434f4158c00a9ee279d3292", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-object-store.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overrides.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 1, - "span": 12, - "targets": [ - { - "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Defaults", - "transformations": [ - { - "id": "labelsToFields", - "options": { } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Value": 1, - "limit_name": 0 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "limit_name" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 2, - "span": 12, - "targets": [ - { - "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Per-tenant overrides", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "limit_name" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "user": 0 - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overrides", - "uid": "1e2c358600ac53f09faea133f811b5bb", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overrides.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview resources", - "uid": "a9b92d3c4d1af325d872a9e9a7083d71", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#7EB26D", - "value": null - }, - { - "color": "#EAB839", - "value": 0.01 - }, - { - "color": "#E24D42", - "value": 0.050000000000000003 - } - ] - } - } - }, - "id": 2, - "options": { - "showValue": "never" - }, - "span": 6, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Writes", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Reads", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Rule evaluations", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", - "instant": false, - "legendFormat": "Alerting notifications", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Object storage", - "range": true - } - ], - "title": "Status", - "type": "state-timeline" - }, - { - "id": 3, - "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", - "alertName": "Mimir", - "dashboardAlerts": false, - "maxItems": 100, - "sortOrder": 3, - "stateFilter": { - "error": true, - "firing": true, - "noData": false, - "normal": false, - "pending": false - } - }, - "span": 3, - "title": "Firing alerts", - "type": "alertlist" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Mimir cluster health", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 4, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Write requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "cps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "samples / sec", - "legendLink": null - }, - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "exemplars / sec", - "legendLink": null - } - ], - "title": "Ingestion / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", - "datasource": null, - "description": "", - "id": 8, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Read requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Read latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "\"active series\" queries", - "color": "#C788DE" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "instant queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_active_series\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"active series\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_names\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label name cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_values\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label value cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars|cardinality_.*)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "other", - "legendLink": null - } - ], - "title": "Queries / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 12, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Rule evaluations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Rule evaluations latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Alerting notifications sent to Alertmanager / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Recording and alerting rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", - "datasource": null, - "description": "", - "id": 16, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "attributes", - "color": "#429D48" - }, - { - "alias": "delete", - "color": "#F1C731" - }, - { - "alias": "exists", - "color": "#2A66CF" - }, - { - "alias": "get", - "color": "#9E44C1" - }, - { - "alias": "get_range", - "color": "#FFAB57" - }, - { - "alias": "iter", - "color": "#C79424" - }, - { - "alias": "upload", - "color": "#84D586" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Total number of blocks in the storage", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Long-term storage (object storage)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Retries", - "type": "timeseries", - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "splitting rate", - "legendLink": null - } - ], - "title": "Intervals per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", - "format": "time_series", - "legendFormat": "{{request_type}}", - "legendLink": null - } - ], - "title": "Query results cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Query results cache skipped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query splitting and results cache", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "sharded queries ratio", - "legendLink": null - } - ], - "title": "Sharded queries ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of sharded queries per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query sharding", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Series per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Samples per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Exemplars per query", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failure Rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failure Rate", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected queries", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Max", - "legendLink": null - }, - { - "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Min", - "legendLink": null - }, - { - "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "legendLink": null - } - ], - "title": "Bucket indexes loaded (per querier)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Bucket indexes load / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Bucket indexes load latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks queried / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data fetched / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data touched / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request average latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request 99th percentile latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", - "format": "time_series", - "legendFormat": "% of time reduced by preloading", - "legendLink": null - } - ], - "title": "Series batch preloading efficiency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Blocks currently owned", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks loaded / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks dropped / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Lazy loaded index-headers", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load gate latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Series hash cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "ExpandedPostings cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Chunks attributes in-memory cache hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Queries", - "uid": "b3abe8d5c040395cc36615cb4334c92d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Instant queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Range queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label names queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label values queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Series queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "Requests/s", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Cache – query results", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{item_type}}", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – block index cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – chunks cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 40, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 41, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 42, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 43, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 44, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 45, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 46, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 47, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 48, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 49, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 50, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 51, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 52, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 53, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 54, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 55, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 56, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads resources", - "uid": "1940f6ef765a506a171faa2056c956c3", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Remote ruler reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Evaluations / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads", - "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-rollout-progress.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Ready" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Updated" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 10, - "x": 0, - "y": 0 - }, - "id": 1, - "links": [ ], - "options": { - "barRadius": 0, - "barWidth": 0.96999999999999997, - "fullHighlight": false, - "groupWidth": 0.69999999999999996, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "xField": "Workload", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0 - }, - "targets": [ - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - }, - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - } - ], - "title": "Rollout progress", - "transformations": [ - { - "id": "joinByField", - "options": { - "byField": "workload", - "mode": "outer" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time 1": true, - "Time 2": true - }, - "renameByName": { - "Value #A": "Updated", - "Value #B": "Ready", - "workload": "Workload" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "field": "Workload" - } - ] - } - } - ], - "type": "barchart" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 0 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 4 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.01 - }, - { - "color": "red", - "value": 0.050000000000000003 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 4 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 4 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "noValue": "All healthy", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 3, - "w": 10, - "x": 0, - "y": 13 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "text": { - "titleSize": 14, - "valueSize": 14 - }, - "textMode": "value_and_name" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{deployment}}", - "legendLink": null, - "step": null - }, - { - "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{statefulset}}", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Unhealthy pods", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "r.*" - }, - "properties": [ - { - "id": "custom.align", - "value": "center" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 10, - "y": 8 - }, - "id": 11, - "targets": [ - { - "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods count per version", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "valueLabel": "version" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "container": 1 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "container" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10 - }, - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "writes", - "legendLink": null - }, - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "reads", - "legendLink": null - } - ], - "title": "Latency vs 24h ago", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": null, - "schemaVersion": 27, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Rollout progress", - "uid": "7f0b5567d543a1698e695b530eb7f5de", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-rollout-progress.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-ruler.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Active configurations", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total rules", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Read from ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Write to ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Evaluations per second", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluations global", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - key-value store for rulers ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failures / sec" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failures / sec", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - blocks storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Delivery errors", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Dropped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Missed iterations", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failures", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Group evaluations", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluation per user", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler configuration object store (ruler accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-ruler.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-scaling.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "id": 1, - "options": { - "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", - "mode": "markdown" - }, - "span": 12, - "title": "", - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Service scaling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "400px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Required Replicas", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "__name__", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Service", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "deployment", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "namespace", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Reason", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Workload-based scaling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Scaling", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Scaling", - "uid": "64bbad83507b7289b514725658e10352", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-scaling.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-slow-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Accross tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 User-Agents", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "fetched_chunk_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_index_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "response_size_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_hit_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_miss_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "estimated_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_chunks_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Time span" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Duration" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "queue_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "query_wall_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "height": "500px", - "id": 19, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Slow queries", - "transformations": [ - { - "id": "extractFields", - "options": { - "source": "labels" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "component": true, - "container": true, - "gossip_ring_member": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "length": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_step": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "response_time": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "err": 10, - "length_seconds": 3, - "param_end": 5, - "param_query": 8, - "param_start": 4, - "param_step_seconds": 7, - "param_time": 6, - "response_time_seconds": 9, - "status": 1, - "ts": 0, - "user": 2 - }, - "renameByName": { - "err": "Error", - "length_seconds": "Time span", - "param_end": "End", - "param_query": "Query", - "param_start": "Start", - "param_step_seconds": "Step", - "param_time": "Time (instant query)", - "response_time_seconds": "Duration", - "ts": "Completion date", - "user": "Tenant ID" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "sharded_queries" - }, - { - "destinationType": "number", - "targetField": "split_queries" - }, - { - "destinationType": "number", - "targetField": "fetched_chunk_bytes" - }, - { - "destinationType": "number", - "targetField": "fetched_index_bytes" - }, - { - "destinationType": "number", - "targetField": "response_size_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_hit_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_miss_bytes" - }, - { - "destinationType": "number", - "targetField": "estimated_series_count" - }, - { - "destinationType": "number", - "targetField": "fetched_chunks_count" - }, - { - "destinationType": "number", - "targetField": "fetched_series_count" - }, - { - "destinationType": "number", - "targetField": "Time span" - }, - { - "destinationType": "number", - "targetField": "Duration" - }, - { - "destinationType": "number", - "targetField": "Step" - }, - { - "destinationType": "number", - "targetField": "queue_time_seconds" - }, - { - "destinationType": "number", - "targetField": "query_wall_time_seconds" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "includeAll": false, - "label": "Loki data source", - "multi": false, - "name": "loki_datasource", - "query": "loki", - "type": "datasource" - }, - { - "current": { - "selected": true, - "text": "5s", - "value": "5s" - }, - "hide": 0, - "label": "Min duration", - "name": "min_duration", - "options": [ - { - "selected": true, - "text": "5s", - "value": "5s" - } - ], - "query": "5s", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "User-Agent HTTP Header", - "name": "user_agent", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Slow queries", - "uid": "6089e1ce1e678788f46312a0a1e647e6", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-slow-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "in-memory", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "owned", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "All series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series per ingester", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Owned series per ingester", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant series counts", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "title": "Series with exemplars", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Oldest exemplar age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "Native histogram series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "buckets", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "buckets ({{ name }})", - "legendLink": null - } - ], - "title": "Total number of buckets used by native histogram series", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars and native histograms", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor requests incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor requests received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Newest seen sample age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded requests rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor ingestion requests", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor samples incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor samples received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "deduplicated", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "non-HA", - "legendLink": null - } - ], - "title": "Distributor deduplicated/non-HA", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (distributor)", - "legendLink": null - }, - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (ingester)", - "legendLink": null - } - ], - "title": "Distributor and ingester discarded samples rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Symbol table size for loaded blocks", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Space used by local blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingesters' storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "time_series", - "legendFormat": "groups", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Number of groups", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "rules", - "legendLink": null - } - ], - "title": "Number of rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Total evaluations rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failed evaluations rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Sent notifications rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Failed notifications rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "alerts", - "legendLink": null - }, - { - "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "silences", - "legendLink": null - } - ], - "title": "Alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (User)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - ruler-query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - ruler-query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (Ruler)", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 50, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactions", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "user", - "multi": false, - "name": "user", - "options": [ ], - "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "500", - "value": "500" - }, - { - "selected": false, - "text": "1000", - "value": "1000" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Tenants", - "uid": "35fa247ce651ba189debf33d7ae41611", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-top-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Top tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by active series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By active series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by in-memory series (series created - series removed)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by discarded samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by series with exemplars", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By series with exemplars", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "exemplars/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received exemplars rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By exemplars rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group size", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group evaluation time", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Compaction Jobs", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by estimated compaction jobs from bucket-index", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By estimated compaction jobs from bucket-index", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Top tenants", - "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-top-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "125px", - "panels": [ - { - "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Writes dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Samples / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", - "fill": 1, - "format": "short", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "In-memory series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", - "fill": 1, - "format": "short", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars in ingesters", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for high-availability (HA) deduplication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for distributors ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - key-value store for the ingesters ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Uploaded blocks / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Upload latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - shipper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Compactions latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB head", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "WAL truncations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Checkpoints created / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "WAL truncations latency (includes checkpointing)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "WAL" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "mmap-ed chunks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E28A42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "WAL", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "mmap-ed chunks", - "legendLink": null - } - ], - "title": "Corruptions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB write ahead log (WAL)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "incoming exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "received exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars received rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "ingested exemplars", - "legendLink": null - } - ], - "title": "Ingester ingested exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "appended exemplars", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected distributor requests", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected ingester requests", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Instance Limits", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes.json - namespace: monitoring-system ---- -apiVersion: v1 data: config.yaml: | analytics: @@ -63018,6 +1423,36 @@ metadata: type: Opaque --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 data: MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= kind: Secret @@ -63132,6 +1567,84 @@ spec: --- apiVersion: v1 kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service metadata: labels: app.kubernetes.io/component: mimir @@ -63213,6 +1726,7 @@ metadata: app.kubernetes.io/name: pyroscope app.kubernetes.io/version: 1.5.0 helm.sh/chart: pyroscope-1.5.0 + prometheus.io/service-monitor: "false" name: pyroscope-headless namespace: profiles-system spec: @@ -63737,1703 +2251,127 @@ spec: app.kubernetes.io/instance: pyroscope app.kubernetes.io/name: pyroscope --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: agent-flow-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: clustering - rules: - - alert: ClusterNotConverging - annotations: - message: Cluster is not converging. - expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) - != 0 - for: 5m - - alert: ClusterSplitBrain - annotations: - message: Cluster nodes have entered a split brain state. - expr: | - sum without (state) (cluster_node_peers) != - on (cluster, namespace) group_left - count by (cluster, namespace) (cluster_node_info) - for: 5m - - alert: ClusterLamportClockDrift - annotations: - message: Cluster nodes' lamport clocks are not converging. - expr: stddev by (cluster, namespace) (cluster_node_lamport_time) > 4 * sqrt(count - by (cluster, namespace) (cluster_node_info)) - for: 5m - - alert: ClusterNodeUnhealthy - annotations: - message: Cluster node is reporting a health score > 0. - expr: | - cluster_node_gossip_health_score > 0 - for: 5m - - alert: ClusterLamportClockStuck - annotations: - message: Cluster nodes's lamport clocks is not progressing. - expr: | - sum by (cluster, namespace, instance) (rate(cluster_node_lamport_time[2m])) == 0 - and on (cluster, namespace, instance) (cluster_node_peers > 1) - for: 5m - - alert: ClusterNodeNameConflict - annotations: - message: A node tried to join the cluster with a name conflicting with an - existing peer. - expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) - > 0 - for: 10m - - alert: ClusterNodeStuckTerminating - annotations: - message: Cluster node stuck in Terminating state. - expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) - > 0 - for: 5m - - alert: ClusterConfigurationDrift - annotations: - message: Cluster nodes are not using the same configuration file. - expr: | - count without (sha256) ( - max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) - ) > 1 - for: 5m - - name: agent_controller - rules: - - alert: SlowComponentEvaluations - annotations: - message: Flow component evaluations are taking too long. - expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) - > 0 - for: 15m - - alert: UnhealthyComponents - annotations: - message: Unhealthy Flow components detected. - expr: sum(agent_component_controller_running_components{health_type!="healthy"}) - > 0 - for: 15m ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: loki-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: loki_alerts - rules: - - alert: LokiRequestErrors - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. - expr: | - 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) - / - sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) - > 10 - for: 15m - labels: - severity: critical - - alert: LokiRequestPanics - annotations: - message: | - {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. - expr: | - sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 - labels: - severity: critical - - alert: LokiRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - expr: | - cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 - for: 15m - labels: - severity: critical - - alert: LokiTooManyCompactorsRunning - annotations: - message: | - {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. - expr: | - sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 - for: 5m - labels: - severity: warning ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: loki-mixin-rules - namespace: monitoring-system -spec: - groups: - - name: loki_rules - rules: - - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:loki_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:loki_request_duration_seconds:50quantile - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m])) - by (cluster, job) - record: cluster_job:loki_request_duration_seconds:avg - - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job) - record: cluster_job:loki_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:loki_request_duration_seconds_sum:sum_rate - - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:loki_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:loki_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:loki_request_duration_seconds:50quantile - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:loki_request_duration_seconds:avg - - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, - route) - record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate - - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:loki_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:loki_request_duration_seconds:avg - - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, - job, route) - record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate - - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +apiVersion: apps/v1 +kind: DaemonSet metadata: - name: mimir-mixin-alerts + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_alerts - rules: - - alert: MimirIngesterUnhealthy - annotations: - message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ - printf "%f" $value }} unhealthy ingester(s). - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy - expr: | - min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 - for: 15m - labels: - severity: critical - - alert: MimirRequestErrors - annotations: - message: | - The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors - expr: | - 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) - / - sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) - > 1 - for: 15m - labels: - severity: critical - - alert: MimirRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency - expr: | - cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} - > - 2.5 - for: 15m - labels: - severity: warning - - alert: MimirQueriesIncorrect - annotations: - message: | - The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect - expr: | - 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) - / - sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 - for: 15m - labels: - severity: warning - - alert: MimirInconsistentRuntimeConfig - annotations: - message: | - An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig - expr: | - count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 - for: 1h - labels: - severity: critical - - alert: MimirBadRuntimeConfig - annotations: - message: | - {{ $labels.job }} failed to reload runtime config. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig - expr: | - # The metric value is reset to 0 on error while reloading the config at runtime. - cortex_runtime_config_last_reload_successful == 0 - for: 5m - labels: - severity: critical - - alert: MimirFrontendQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 - for: 5m - labels: - severity: critical - - alert: MimirSchedulerQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 - for: 7m - labels: - severity: critical - - alert: MimirCacheRequestErrors - annotations: - message: | - The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors - expr: | - ( - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operation_failures_total[1m]) - or - rate(thanos_cache_operation_failures_total[1m]) - ) - / - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operations_total[1m]) - or - rate(thanos_cache_operations_total[1m]) - ) - ) * 100 > 5 - for: 5m - labels: - severity: warning - - alert: MimirIngesterRestarts - annotations: - message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts - expr: | - ( - sum by(cluster, namespace, pod) ( - increase(kube_pod_container_status_restarts_total{container=~"(ingester|mimir-write)"}[30m]) - ) - >= 2 - ) - and - ( - count by(cluster, namespace, pod) (cortex_build_info) > 0 - ) - labels: - severity: warning - - alert: MimirKVStoreFailure - annotations: - message: | - Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure - expr: | - ( - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) - / - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) - ) - # We want to get alerted only in case there's a constant failure. - == 1 - for: 5m - labels: - severity: critical - - alert: MimirMemoryMapAreasTooHigh - annotations: - message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas - close to the limit.' - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh - expr: | - process_memory_map_areas{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirIngesterInstanceHasNoTenants - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no tenants assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants - expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) - and on (cluster, namespace) - # Only if there are more time-series than would be expected due to continuous testing load - ( - sum by(cluster, namespace) (cortex_ingester_memory_series) - / - max by(cluster, namespace) (cortex_distributor_replication_factor) - ) > 100000 - for: 1h - labels: - severity: warning - - alert: MimirRulerInstanceHasNoRuleGroups - annotations: - message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no rule groups assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups - expr: | - # Alert on ruler instances in microservices mode that have no rule groups assigned, - min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?ruler.*"}) == 0 - # but only if other ruler instances of the same cell do have rule groups assigned - and on (cluster, namespace) - (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) - # and there are more than two instances overall - and on (cluster, namespace) - (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) - for: 1h - labels: - severity: warning - - alert: MimirIngestedDataTooFarInTheFuture - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has ingested samples with timestamps more than 1h in the future. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture - expr: | - max by(cluster, namespace, pod) ( - cortex_ingester_tsdb_head_max_timestamp_seconds - time() - and - cortex_ingester_tsdb_head_max_timestamp_seconds > 0 - ) > 60*60 - for: 5m - labels: - severity: warning - - alert: MimirRingMembersMismatch - annotations: - message: | - Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch - expr: | - ( - avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"})) - != sum by(cluster, namespace) (up{job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"}) - ) - and - ( - count by(cluster, namespace) (cortex_build_info) > 0 - ) - for: 15m - labels: - component: ingester - severity: warning - - name: mimir_instance_limits_alerts - rules: - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.8 - for: 3h - labels: - severity: warning - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.9 - for: 5m - labels: - severity: critical - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.7 - for: 5m - labels: - severity: warning - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirReachingTCPConnectionsLimit - annotations: - message: | - Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit - expr: | - cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and - cortex_tcp_connections_limit > 0 - for: 5m - labels: - severity: critical - - alert: MimirDistributorReachingInflightPushRequestLimit - annotations: - message: | - Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit - expr: | - ( - (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) - and ignoring (limit) - (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - name: mimir-rollout-alerts - rules: - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - max without (revision) ( - sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - unless - sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - * - ( - sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - != - sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - ) and ( - changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: statefulset - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - != - sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - ) and ( - changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: deployment - - alert: RolloutOperatorNotReconciling - annotations: - message: | - Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling - expr: | - max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 - for: 5m - labels: - severity: critical - - name: mimir-provisioning - rules: - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.65 - for: 15m - labels: - severity: warning - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.8 - for: 15m - labels: - severity: critical - - name: ruler_alerts - rules: - - alert: MimirRulerTooManyFailedPushes - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerTooManyFailedQueries - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerMissedEvaluations - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations - expr: | - 100 * ( - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) - / - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) - ) > 1 - for: 5m - labels: - severity: warning - - alert: MimirRulerFailedRingCheck - annotations: - message: | - Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck - expr: | - sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) - > 0 - for: 5m - labels: - severity: critical - - alert: MimirRulerRemoteEvaluationFailing - annotations: - message: | - Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing - expr: | - 100 * ( - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(ruler-query-frontend.*)"}[5m])) - / - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(ruler-query-frontend.*)"}[5m])) - ) > 1 - for: 5m - labels: - severity: warning - - name: gossip_alerts - rules: - - alert: MimirGossipMembersTooHigh - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a higher than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh - expr: | - max by (cluster, namespace) (memberlist_client_cluster_members_count) - > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) - for: 20m - labels: - severity: warning - - alert: MimirGossipMembersTooLow - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a lower than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow - expr: | - min by (cluster, namespace) (memberlist_client_cluster_members_count) - < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) - for: 20m - labels: - severity: warning - - name: etcd_alerts - rules: - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.65 - for: 15m - labels: - severity: warning - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.8 - for: 15m - labels: - severity: critical - - name: alertmanager_alerts - rules: - - alert: MimirAlertmanagerSyncConfigsFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing - expr: | - rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 - for: 30m - labels: - severity: critical - - alert: MimirAlertmanagerRingCheckFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing - expr: | - rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPartialStateMergeFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing - expr: | - rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerReplicationFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing - expr: | - rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPersistStateFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing - expr: | - rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 - for: 1h - labels: - severity: critical - - alert: MimirAlertmanagerInitialSyncFailed - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed - expr: | - increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 - labels: - severity: critical - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: warning - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: critical - - alert: MimirAlertmanagerInstanceHasNoTenants - annotations: - message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} owns no tenants. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants - expr: | - # Alert on alertmanager instances in microservices mode that own no tenants, - min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?alertmanager.*"}) == 0 - # but only if other instances of the same cell do have tenants assigned. - and on (cluster, namespace) - max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 - for: 1h - labels: - severity: warning - - name: mimir_blocks_alerts - rules: - - alert: MimirIngesterHasNotShippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks - expr: | - (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) - and - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) - and - # Only if the ingester has ingested samples over the last 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - and - # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica - # had ingested samples in the past, then no traffic was received for a long period and then it starts - # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving - # samples, while the a block shipping is expected within the next 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterHasNotShippedBlocksSinceStart - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart - expr: | - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) - and - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - for: 4h - labels: - severity: critical - - alert: MimirIngesterHasUnshippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't - been successfully uploaded to the storage yet. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks - expr: | - (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) - and - (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadCompactionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to compact TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed - expr: | - rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed - expr: | - rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointCreationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to create TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointDeletionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to delete TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBWALTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed - expr: | - rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 - labels: - severity: warning - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 - and - # and there is only one zone - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 - labels: - deployment: single-zone - severity: critical - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 - and - # and there are multiple zones - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 - labels: - deployment: multi-zone - severity: critical - - alert: MimirIngesterTSDBWALWritesFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to write to TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed - expr: | - rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 - for: 3m - labels: - severity: critical - - alert: MimirStoreGatewayHasNotSyncTheBucket - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} has not successfully synched the bucket since {{ $value - | humanizeDuration }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket - expr: | - (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) - and - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 - for: 5m - labels: - severity: critical - - alert: MimirStoreGatewayNoSyncedTenants - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not syncing any blocks for any tenant. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants - expr: | - min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 - for: 1h - labels: - severity: warning - - alert: MimirBucketIndexNotUpdated - annotations: - message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster - }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated - expr: | - min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 - labels: - severity: critical - - name: mimir_compactor_alerts - rules: - - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully cleaned up blocks in the last 6 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) - for: 1h - labels: - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) - and - (cortex_compactor_last_successful_run_timestamp_seconds > 0) - for: 1h - labels: - reason: in-last-24h - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - cortex_compactor_last_successful_run_timestamp_seconds == 0 - for: 24h - labels: - reason: since-startup - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} failed to run 2 consecutive compactions. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 - labels: - reason: consecutive-failures - severity: critical - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) - and - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 15m - labels: - severity: critical - time_period: 24h - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block since its start. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 24h - labels: - severity: critical - time_period: since-start - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 - for: 1m - labels: - severity: warning - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 - for: 30m - labels: - severity: critical - - name: mimir_autoscaling - rules: - - alert: MimirAutoscalerNotActive - annotations: - message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler - }} in {{ $labels.namespace }} is not active. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive - expr: | - ( - label_replace(( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - # Add "metric" label. - + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0), - "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" - ) - ) - # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, - # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported - # by KEDA could not exist at all or being exposed with a value of 0. - and on (cluster, namespace, metric, scaledObject) - (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) - for: 1h - labels: - severity: critical - - alert: MimirAutoscalerKedaFailing - annotations: - message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace - }} is experiencing errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing - expr: | - ( - # Find KEDA scalers reporting errors. - label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - ) - > 0 - for: 1h - labels: - severity: critical - - name: mimir_continuous_test - rules: - - alert: MimirContinuousTestNotRunningOnWrites - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because writes are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestNotRunningOnReads - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because queries are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestFailed + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} failed when asserting query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 - labels: - severity: warning ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: mimir-mixin-rules - namespace: monitoring-system -spec: - groups: - - name: mimir_api_1 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / - sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:sum_rate - - name: mimir_api_2 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job, route) - record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, - route) - record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_api_3 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_querier_api - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job) - record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - name: mimir_storage - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:50quantile - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds:avg - - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate - - name: mimir_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:50quantile - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - by (cluster, job) - record: cluster_job:cortex_query_frontend_retries:avg - - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) - record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by - (cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by - (le, cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate - - name: mimir_ingester_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:50quantile - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series:avg - - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:50quantile - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples:avg - - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:50quantile - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars:avg - - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate - - name: mimir_received_samples - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_samples:rate5m - - name: mimir_exemplars_in - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) - record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m - - name: mimir_received_exemplars - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m - - name: mimir_exemplars_ingested - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) - record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m - - name: mimir_exemplars_appended - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) - record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m - - name: mimir_scaling_rules - rules: - - expr: | - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (cluster, namespace, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - record: cluster_namespace_deployment:actual_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - / 240000 - ) - labels: - deployment: distributor - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 240000 - ) - labels: - deployment: distributor - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - * 3 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by(cluster, namespace) ( - cortex_ingester_memory_series - )[24h:] - ) - / 1500000 - ) - labels: - deployment: ingester - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) - * 3 * 0.59999999999999998 / 1500000 - ) - labels: - deployment: ingester - reason: active_series_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - (sum by (cluster, namespace) ( - cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} - ) / 4) - / - avg by (cluster, namespace) ( - memcached_limit_bytes{job=~".+/memcached"} - ) - ) - labels: - deployment: memcached - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate - - expr: | - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - labels: - reason: cpu_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_memory_usage_bytes:sum - - expr: | - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent labels: - reason: memory_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - name: mimir_alertmanager_rules - rules: - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_alerts) - record: cluster_job_pod:cortex_alertmanager_alerts:sum - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_silences) - record: cluster_job_pod:cortex_alertmanager_silences:sum - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m - - name: mimir_ingester_rules - rules: - - expr: | - sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) - record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-d78c7bkcmg + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -65466,6 +2404,27 @@ spec: --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor metadata: labels: app.kubernetes.io/component: mimir @@ -65500,6 +2459,42 @@ spec: --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: all + app.kubernetes.io/instance: pyroscope + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: pyroscope + app.kubernetes.io/version: 1.5.0 + helm.sh/chart: pyroscope-1.5.0 + name: pyroscope + namespace: profiles-system +spec: + endpoints: + - port: http2 + relabelings: + - action: replace + replacement: profiles-system/pyroscope + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - profiles-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: all + app.kubernetes.io/instance: pyroscope + app.kubernetes.io/name: pyroscope +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor metadata: name: tempo namespace: tracing-system @@ -65526,3 +2521,37 @@ spec: matchLabels: app.kubernetes.io/instance: tempo app.kubernetes.io/name: tempo +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/monolithic-mode/all-in-one/kustomization.yaml b/kubernetes/monolithic-mode/all-in-one/kustomization.yaml index 15bfa851..f4c81ef8 100644 --- a/kubernetes/monolithic-mode/all-in-one/kustomization.yaml +++ b/kubernetes/monolithic-mode/all-in-one/kustomization.yaml @@ -9,32 +9,19 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- ../metrics/mimir +- ../../common/grafana-agent - ../logs/loki -- ../profiles/pyroscope - ../traces/tempo -- ../../../monitoring-mixins/agent-flow-mixin/deploy -- ../../../monitoring-mixins/go-runtime-mixin/deploy -- ../../../monitoring-mixins/loki-mixin/deploy -- ../../../monitoring-mixins/mimir-mixin/deploy -# - ../../../monitoring-mixins/tempo-mixin/deploy +- ../metrics/mimir +- ../profiles/pyroscope configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-all-in-one.yaml - # Update LGTMP endpoint in gateway - name: nginx-templates namespace: gateway diff --git a/kubernetes/monolithic-mode/logs/configs/config.river b/kubernetes/monolithic-mode/logs/configs/config.river index f7fb6921..ffb94479 100644 --- a/kubernetes/monolithic-mode/logs/configs/config.river +++ b/kubernetes/monolithic-mode/logs/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -34,6 +37,18 @@ module.file "logs_primary" { } } +/******************************************** + * Metrics + ********************************************/ +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/metrics.river" + + arguments { + forward_to = [module.file.lgtmp.exports.metrics_receiver] + clustering = true + } +} + /******************************************** * Agent Integrations ********************************************/ diff --git a/kubernetes/monolithic-mode/logs/configs/grafana-datasources-loki.yaml b/kubernetes/monolithic-mode/logs/configs/grafana-datasources-loki.yaml deleted file mode 100644 index 46f535f3..00000000 --- a/kubernetes/monolithic-mode/logs/configs/grafana-datasources-loki.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Logs - uid: logs - -datasources: -# Loki for logs -- name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml b/kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml index 74165bfb..75107f52 100644 --- a/kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml +++ b/kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml @@ -16,6 +16,132 @@ metadata: name: loki namespace: logging-system --- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -39,6 +165,25 @@ rules: --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding metadata: labels: app.kubernetes.io/instance: loki @@ -172,9 +317,11 @@ data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -184,41 +331,636 @@ data: + \"/logs.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.logs_receiver]\n\t\tgit_repo \ = \"https://github.com/qclaogui/agent-modules.git\"\n\t\tgit_rev = \"main\"\n\t\tgit_pull_freq = \"0s\"\n\t}\n}\n\n/********************************************\n - * Agent Integrations\n ********************************************/\nmodule.file - \"agent_integrations\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/integrations.river\"\n\n\targuments {\n\t\tname = - \"agent-integrations\"\n\t\tnamespace = \"monitoring-system\"\n\t\tforward_to - = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" + * Metrics\n ********************************************/\nmodule.file \"metrics_primary\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering + = true\n\t}\n}\n\n/********************************************\n * Agent Integrations\n + ********************************************/\nmodule.file \"agent_integrations\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/integrations.river\"\n\n\targuments {\n\t\tname = \"agent-integrations\"\n\t\tnamespace + \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-config-6thf5hghkg namespace: monitoring-system --- apiVersion: v1 data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Logs - uid: logs - - datasources: - # Loki for logs - - name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-integrations + namespace: monitoring-system +--- +apiVersion: v1 +data: + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-modules-cf8t5bf7t9 + namespace: monitoring-system +--- +apiVersion: v1 +data: + alertmanager_fallback_config.yaml: | + route: + group_wait: 0s + receiver: empty-receiver + + receivers: + # In this example we're not going to send any notification out of Alertmanager. + - name: 'empty-receiver' + mimir.yaml: | + # Do not use this configuration in production. + # It is for demonstration purposes only. + multitenancy_enabled: false + + # -usage-stats.enabled=false + usage_stats: + enabled: false + + server: + http_listen_port: 8080 + grpc_listen_port: 9095 + log_level: info + + # https://grafana.com/docs/mimir/latest/references/configuration-parameters/#use-environment-variables-in-the-configuration + common: + storage: + backend: s3 + s3: + endpoint: ${MIMIR_S3_ENDPOINT:minio.minio-system.svc:443} + access_key_id: ${MIMIR_S3_ACCESS_KEY_ID:lgtmp} + secret_access_key: ${MIMIR_S3_SECRET_ACCESS_KEY:supersecret} + insecure: ${MIMIR_S3_INSECURE:false} + http: + insecure_skip_verify: true + + alertmanager: + data_dir: /data/alertmanager + enable_api: true + external_url: /alertmanager + fallback_config_file: /etc/mimir/alertmanager_fallback_config.yaml + alertmanager_storage: + s3: + bucket_name: mimir-alertmanager + + + memberlist: + join_members: [ mimir-memberlist:7946 ] + + ingester: + ring: + replication_factor: 1 + + store_gateway: + sharding_ring: + replication_factor: 1 + + + blocks_storage: + s3: + bucket_name: mimir-blocks + tsdb: + dir: /data/ingester + ship_interval: 1m + block_ranges_period: [ 2h ] + retention_period: 3h + bucket_store: + index_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + chunks_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + metadata_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + ruler: + rule_path: /data/rules + enable_api: true + alertmanager_url: http://localhost:8080/alertmanager + ruler_storage: + s3: + bucket_name: mimir-ruler + cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + compactor: + compaction_interval: 30s + data_dir: /data/mimir-compactor + cleanup_interval: 1m + tenant_cleanup_delay: 1m + + limits: + native_histograms_ingestion_enabled: true + + overrides_exporter: + ring: + enabled: true + wait_stability_min_duration: 30s + + runtime_config: + file: /etc/mimir/runtime.yaml + runtime.yaml: |- + # This file can be used to set overrides or other runtime config. + ingester_limits: # limits that each ingester replica enforces + max_ingestion_rate: 20000 + max_series: 1500000 + max_tenants: 1000 + max_inflight_push_requests: 30000 + + distributor_limits: # limits that each distributor replica enforces + max_ingestion_rate: 20000 + max_inflight_push_requests: 30000 + max_inflight_push_requests_bytes: 50000000 + + overrides: + anonymous: # limits for anonymous that the whole cluster enforces + # ingestion_tenant_shard_size: 9 + max_global_series_per_user: 1500000 + max_fetched_series_per_query: 100000 + native_histograms_ingestion_enabled: true + ruler_max_rules_per_rule_group: 50 kind: ConfigMap metadata: labels: - grafana_datasource: "1" - name: grafana-datasources-9tgbk45h65 + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-config-958c4gm5k9 namespace: monitoring-system --- apiVersion: v1 @@ -231,6 +973,51 @@ metadata: type: Opaque --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 kind: Service metadata: labels: @@ -305,6 +1092,219 @@ spec: app.kubernetes.io/part-of: memberlist type: ClusterIP --- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + ports: + - name: http-metrics + port: 8080 + - name: grpc-distribut + port: 9095 + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + prometheus.io/service-monitor: "false" + name: mimir-memberlist + namespace: monitoring-system +spec: + clusterIP: None + ports: + - appProtocol: tcp + name: tcp-gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + template: + metadata: + annotations: + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: info + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: mimir + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + spec: + containers: + - args: + - -target=all + - -config.expand-env=true + - -config.file=/etc/mimir/mimir.yaml + - -memberlist.bind-addr=$(POD_IP) + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + envFrom: + - secretRef: + name: mimir-env-92ddctt858 + image: docker.io/grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: mimir + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc-distribut + - containerPort: 7946 + name: http-memberlist + readinessProbe: + httpGet: + path: /ready + port: http-metrics + resources: + limits: + cpu: 999m + memory: 1Gi + requests: + cpu: 10m + memory: 55Mi + volumeMounts: + - mountPath: /etc/mimir + name: config + - mountPath: /data + name: storage + terminationGracePeriodSeconds: 60 + volumes: + - configMap: + name: mimir-config-958c4gm5k9 + name: config + - emptyDir: {} + name: storage +--- apiVersion: apps/v1 kind: StatefulSet metadata: @@ -437,6 +1437,128 @@ spec: requests: storage: 5Gi --- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-6thf5hghkg + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate +--- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -465,3 +1587,92 @@ spec: matchLabels: app.kubernetes.io/instance: loki app.kubernetes.io/name: loki +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + endpoints: + - port: http-metrics + relabelings: + - replacement: monitoring-system/mimir + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - monitoring-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/monolithic-mode/logs/kustomization.yaml b/kubernetes/monolithic-mode/logs/kustomization.yaml index 7128724c..779d178a 100644 --- a/kubernetes/monolithic-mode/logs/kustomization.yaml +++ b/kubernetes/monolithic-mode/logs/kustomization.yaml @@ -8,8 +8,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - loki +# optional +- ../metrics/mimir + # Horizontally scale up support(Monolithic mode) # https://grafana.com/docs/loki/latest/get-started/deployment-modes/#monolithic-mode replicas: @@ -19,15 +23,7 @@ replicas: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-loki.yaml diff --git a/kubernetes/monolithic-mode/metrics/configs/config.river b/kubernetes/monolithic-mode/metrics/configs/config.river index b6bfcd6d..b8070635 100644 --- a/kubernetes/monolithic-mode/metrics/configs/config.river +++ b/kubernetes/monolithic-mode/metrics/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" diff --git a/kubernetes/monolithic-mode/metrics/configs/grafana-datasources-mimir.yaml b/kubernetes/monolithic-mode/metrics/configs/grafana-datasources-mimir.yaml deleted file mode 100644 index 4621df12..00000000 --- a/kubernetes/monolithic-mode/metrics/configs/grafana-datasources-mimir.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Metrics - uid: metrics -- name: Logs - uid: logs - -datasources: -# Mimir for metrics -- name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true - -# Loki for logs -- name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: false - version: 1 - editable: true diff --git a/kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml b/kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml index bf409ad4..c41f0fc9 100644 --- a/kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml +++ b/kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml @@ -1,5 +1,17 @@ apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/component: mimir @@ -10,856 +22,125 @@ metadata: name: mimir namespace: monitoring-system --- -apiVersion: v1 -data: - agent-cluster-node.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Node Info", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Lamport clock time" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Internal cluster state observers" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip health score" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip protocol version" - } - ], - "title": "Node Info", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #(.*)", - "renamePattern": "$1" - } - }, - { - "id": "reduce", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { }, - "indexByName": { }, - "renameByName": { - "Field": "Metric", - "Max": "Value" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{event}}", - "range": true - } - ], - "title": "Gossip ops/s", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:peers" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Known peers", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node by state (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "{{state}}", - "range": true - } - ], - "title": "Peers by state", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "title": "Gossip Transport", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Transport bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Packet write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", - "fieldConfig": { - "defaults": { - "unit": "pkts" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "tx queue", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "rx queue", - "range": true - } - ], - "title": "Pending packet queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Stream bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Stream write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "Open streams", - "range": true - } - ], - "title": "Open transport streams", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "instance", - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Node", - "uid": "dd370cd333b2d9258435fb1b5a20a89b" - } -kind: ConfigMap +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin labels: - grafana_dashboard: "1" - name: agent-cluster-node.json - namespace: monitoring-system + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch --- -apiVersion: v1 -data: - agent-cluster-overview.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Nodes", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Nodes info.\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Dashboard" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "index": 0, - "text": "Link" - } - }, - "type": "value" - } - ] - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Detail dashboard for node", - "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Node table", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": false, - "__name__": true, - "cluster": true, - "namespace": true, - "state": false - }, - "indexByName": { }, - "renameByName": { - "Value": "Dashboard", - "instance": "", - "state": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "1": { - "color": "red", - "index": 1, - "text": "Not converged" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "green", - "index": 0, - "text": "Converged" - } - }, - "type": "special" - } - ], - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 9 - }, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Convergance state", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 80, - "spanNulls": true - }, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "text": "Yes" - } - }, - "type": "value" - }, - { - "options": { - "1": { - "color": "red", - "text": "No" - } - }, - "type": "value" - } - ], - "max": 1, - "noValue": 0 - } - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 9 - }, - "options": { - "mergeValues": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", - "instant": false, - "legendFormat": "Converged", - "range": true - } - ], - "title": "Convergance state timeline", - "type": "state-timeline" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Overview", - "uid": "7e07f9c975fcfc2a6e120a95f579f843" - } -kind: ConfigMap +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin labels: - grafana_dashboard: "1" - name: agent-cluster-overview.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent namespace: monitoring-system --- apiVersion: v1 @@ -867,9 +148,11 @@ data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -884,9333 +167,485 @@ data: \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-controller.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component controller documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", - "fieldConfig": { - "defaults": { - "unit": "agents" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 0 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running agents", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The number of running components across all running agents.\n", - "fieldConfig": { - "defaults": { - "unit": "components" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 4 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running components", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The percentage of components which are in a healthy state.\n", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "No components", - "unit": "percentunit" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 8 - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "text": { - "valueSize": 80 - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Overall component health", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", - "fieldConfig": { - "defaults": { - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Unhealthy" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unknown" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "blue", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Exited" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 14, - "x": 10, - "y": 0 - }, - "options": { - "orientation": "vertical", - "showUnfilled": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Healthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Unhealthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", - "instant": true, - "legendFormat": "Unknown", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", - "instant": true, - "legendFormat": "Exited", - "range": false - } - ], - "title": "Components by health", - "type": "bargauge" - }, - { - "datasource": "${datasource}", - "description": "The frequency at which components get updated.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "options": { - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Component evaluation rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Component evaluation time", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Slow components evaluation times", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component evaluation histogram", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component dependency wait histogram", - "type": "heatmap" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Controller", - "uid": "f861e5fef2e795edd5c4c73bee1ba769" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-controller.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-opentelemetry.json: |- - { - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Receivers for traces [otelcol.receiver]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully pushed into the pipeline.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Accepted spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans that could not be pushed into the pipeline.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Refused spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The duration of inbound RPCs.\n", - "fieldConfig": { - "defaults": { - "unit": "milliseconds" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "RPC server duration (traces)", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "title": "Batching [otelcol.processor.batch]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of units in the batch\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Number of units in the batch", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Number of distinct metadata value combinations being processed\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Distinct metadata values", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of times the batch was sent due to a timeout trigger\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Timeout trigger", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "title": "Exporters for traces [otelcol.exporter]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully sent to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported sent spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans in failed attempts to send to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported failed spans", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / OpenTelemetry", - "uid": "c90e752eb8c0fce588f906b7279aceea" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-opentelemetry.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-prometheus-remote-write.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "prometheus.scrape", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "% of targets successfully scraped", - "range": true - } - ], - "title": "Scrape success rate in $cluster", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p99", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p95", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p50", - "range": true - } - ], - "title": "Scrape duration in $cluster", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "title": "prometheus.remote_write", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "WAL delay", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Data write throughput", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Minimum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Maximum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Minimum", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Maximum", - "range": true - } - ], - "title": "Shards", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total outgoing samples sent by prometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Sent samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Failed samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Retried samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "showLegend": false - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "Series", - "range": true - } - ], - "title": "Active series (total)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Active series (by instance/component)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Active series (by component)", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "component", - "multi": true, - "name": "component", - "query": { - "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", - "refId": "component" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "url", - "multi": true, - "name": "url", - "query": { - "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", - "refId": "url" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Prometheus Components", - "uid": "ee34ffa2d084547d650e1d96a26306aa" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-prometheus-remote-write.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-resources.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Resident memory size of the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate at which the Grafana Agent process performs garbage collections.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Garbage collections", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Heap memory currently in use by the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (heap inuse)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network send bandwidth", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Resources", - "uid": "d47aae5c53be5550f8e3bc8a904ba61a" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - go-runtime.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Go runtime metrics", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 14, - "iteration": 1623758038990, - "links": [ ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average total bytes of memory reserved across all process instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Total Reserved Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average stack memory usage across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: stack inuse (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Stack Memory Use", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mspan (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mcache (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: buck hash (avg)", - "refId": "E" - }, - { - "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: gc (avg)", - "refId": "F" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Other Memory Reservations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap reserved (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap in use (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap alloc (avg)", - "refId": "C" - }, - { - "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap idle (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap released (avg)", - "refId": "E" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average allocation rate in bytes per second, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{job}}: bytes malloced/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Allocation Rate, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average rate of heap object allocation, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{job}}: obj mallocs/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Object Allocation Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of live memory objects across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: object count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Number of Live Objects", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of goroutines across instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: goroutine count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: min gc time (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: max gc time (avg)", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GC min & max duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} next gc bytes (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Next GC, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "go-runtime" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "MONITORING", - "value": "MONITORING" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "pilot", - "value": "pilot" - }, - "datasource": "$datasource", - "definition": "label_values(go_info, job)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "job", - "multi": false, - "name": "job", - "options": [ ], - "query": { - "query": "label_values(go_info, job)", - "refId": "MONITORING-job-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "definition": "label_values(go_info{job=\"$job\"}, instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": { - "query": "label_values(go_info{job=\"$job\"}, instance)", - "refId": "MONITORING-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Go runtime metrics", - "uid": "T4sSTLBGzgp", - "version": 1 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Go - Runtime - labels: - grafana_dashboard: "1" - name: go-runtime.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Metrics - uid: metrics - - name: Logs - uid: logs - - datasources: - # Mimir for metrics - - name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true - - # Loki for logs - - name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: false - version: 1 - editable: true -kind: ConfigMap -metadata: - labels: - grafana_datasource: "1" - name: grafana-datasources-45d66kgh8f + name: agent-config-9cc7gk9k2b namespace: monitoring-system --- apiVersion: v1 data: - mimir-alertmanager-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager resources", - "uid": "a6883fb22799ac74479c7db872451092", - "version": 0 - } + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager-resources.json + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - mimir-alertmanager.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total alerts", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total silences", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "APS", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts received", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "per pod Active Aggregation Groups", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts grouping", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alert notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod tenants", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod silences", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Replication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Syncs/sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Syncs/sec (by reason)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "errors", - "legendLink": null - } - ], - "title": "Ring check errors/sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant configuration sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "{{outcome}}", - "legendLink": null - } - ], - "title": "Initial syncs /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Initial sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "1m", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Fetch state from other alertmanagers /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding initial state sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Replicate state to other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Merge state from other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Persist state to remote storage /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding runtime state sync", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager", - "uid": "b0d38d318bbddd80476246d4930f9e55", - "version": 0 - } + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU and memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "completed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "started" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#34CCEB", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "started", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "completed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Per-instance runs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", - "fieldConfig": { - "defaults": { - "max": 1, - "noValue": 1, - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Tenants compaction progress", - "type": "timeseries" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "No compactor data", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "text", - "text": "No successful runs since startup yet" - }, - "to": 0 - }, - "type": "range" - } - ] - }, - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 7200 - }, - { - "color": "orange", - "value": 21600 - }, - { - "color": "red", - "value": 43200 - } - ] - } - } - ] - } - ] - }, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "reduceOptions": { - "calcs": [ - "first" - ], - "fields": "/^Last run$/", - "values": false - }, - "textMode": "value" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Longest time since last successful run", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - } - ], - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Status" - }, - "properties": [ - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "transparent", - "text": "N/A" - }, - "to": 0 - }, - "type": "range" - }, - { - "options": { - "from": 0, - "result": { - "color": "green", - "text": "Ok" - }, - "to": 7200 - }, - "type": "range" - }, - { - "options": { - "from": 7200, - "result": { - "color": "yellow", - "text": "Delayed" - }, - "to": 21600 - }, - "type": "range" - }, - { - "options": { - "from": 21600, - "result": { - "color": "orange", - "text": "Late" - }, - "to": 43200 - }, - "type": "range" - }, - { - "options": { - "from": 43200, - "result": { - "color": "red", - "text": "Very late" - }, - "to": "Infinity" - }, - "type": "range" - }, - { - "options": { - "match": "null+nan", - "result": { - "color": "transparent", - "text": "Unknown" - } - }, - "type": "special" - } - ] - }, - { - "id": "custom.width", - "value": 86 - }, - { - "id": "custom.align", - "value": "center" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "text": "Never" - }, - "to": 0 - }, - "type": "range" - } - ] - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "legendFormat": "Last run", - "legendLink": null - } - ], - "title": "Last successful run per-compactor replica", - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - }, - { - "id": "calculateField", - "options": { - "alias": "One", - "binary": { - "left": "Last run", - "operator": "/", - "right": "Last run" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "calculateField", - "options": { - "alias": "Status", - "binary": { - "left": "Last run", - "operator": "*", - "right": "One" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Compactor", - "Last run", - "Status" - ] - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", - "format": "time_series", - "legendFormat": "Jobs", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "compactions", - "legendLink": null - } - ], - "title": "TSDB compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "TSDB compaction duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "Average blocks / tenant", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Tenants with largest number of blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks marked for deletion / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks deletions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Garbage collector", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Metadata syncs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Metadata sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Metadata sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Object Store", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Key-value store for compactors ring", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor.json + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 @@ -10356,33187 +791,126 @@ metadata: --- apiVersion: v1 data: - mimir-config.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Startup config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Startup config file", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Runtime config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Runtime config file", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Config", - "uid": "5d9d0b4724c0f80d68467088ec61e003", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-config.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-object-store.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "RPS / component", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "Error rate / component", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Components", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "RPS / operation", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate / operation", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Operations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Object Store", - "uid": "e1324ee2a434f4158c00a9ee279d3292", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-object-store.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overrides.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 1, - "span": 12, - "targets": [ - { - "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Defaults", - "transformations": [ - { - "id": "labelsToFields", - "options": { } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Value": 1, - "limit_name": 0 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "limit_name" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 2, - "span": 12, - "targets": [ - { - "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Per-tenant overrides", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "limit_name" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "user": 0 - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overrides", - "uid": "1e2c358600ac53f09faea133f811b5bb", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overrides.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview resources", - "uid": "a9b92d3c4d1af325d872a9e9a7083d71", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#7EB26D", - "value": null - }, - { - "color": "#EAB839", - "value": 0.01 - }, - { - "color": "#E24D42", - "value": 0.050000000000000003 - } - ] - } - } - }, - "id": 2, - "options": { - "showValue": "never" - }, - "span": 6, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Writes", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Reads", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Rule evaluations", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", - "instant": false, - "legendFormat": "Alerting notifications", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Object storage", - "range": true - } - ], - "title": "Status", - "type": "state-timeline" - }, - { - "id": 3, - "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", - "alertName": "Mimir", - "dashboardAlerts": false, - "maxItems": 100, - "sortOrder": 3, - "stateFilter": { - "error": true, - "firing": true, - "noData": false, - "normal": false, - "pending": false - } - }, - "span": 3, - "title": "Firing alerts", - "type": "alertlist" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Mimir cluster health", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 4, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Write requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "cps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "samples / sec", - "legendLink": null - }, - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "exemplars / sec", - "legendLink": null - } - ], - "title": "Ingestion / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", - "datasource": null, - "description": "", - "id": 8, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Read requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Read latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "\"active series\" queries", - "color": "#C788DE" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "instant queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_active_series\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"active series\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_names\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label name cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_values\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label value cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars|cardinality_.*)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "other", - "legendLink": null - } - ], - "title": "Queries / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 12, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Rule evaluations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Rule evaluations latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Alerting notifications sent to Alertmanager / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Recording and alerting rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", - "datasource": null, - "description": "", - "id": 16, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "attributes", - "color": "#429D48" - }, - { - "alias": "delete", - "color": "#F1C731" - }, - { - "alias": "exists", - "color": "#2A66CF" - }, - { - "alias": "get", - "color": "#9E44C1" - }, - { - "alias": "get_range", - "color": "#FFAB57" - }, - { - "alias": "iter", - "color": "#C79424" - }, - { - "alias": "upload", - "color": "#84D586" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Total number of blocks in the storage", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Long-term storage (object storage)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Retries", - "type": "timeseries", - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "splitting rate", - "legendLink": null - } - ], - "title": "Intervals per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", - "format": "time_series", - "legendFormat": "{{request_type}}", - "legendLink": null - } - ], - "title": "Query results cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Query results cache skipped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query splitting and results cache", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "sharded queries ratio", - "legendLink": null - } - ], - "title": "Sharded queries ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of sharded queries per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query sharding", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Series per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Samples per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Exemplars per query", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failure Rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failure Rate", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected queries", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Max", - "legendLink": null - }, - { - "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Min", - "legendLink": null - }, - { - "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "legendLink": null - } - ], - "title": "Bucket indexes loaded (per querier)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Bucket indexes load / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Bucket indexes load latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks queried / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data fetched / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data touched / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request average latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request 99th percentile latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", - "format": "time_series", - "legendFormat": "% of time reduced by preloading", - "legendLink": null - } - ], - "title": "Series batch preloading efficiency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Blocks currently owned", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks loaded / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks dropped / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Lazy loaded index-headers", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load gate latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Series hash cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "ExpandedPostings cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Chunks attributes in-memory cache hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Queries", - "uid": "b3abe8d5c040395cc36615cb4334c92d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Instant queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Range queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label names queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label values queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Series queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "Requests/s", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Cache – query results", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{item_type}}", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – block index cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – chunks cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 40, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 41, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 42, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 43, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 44, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 45, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 46, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 47, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 48, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 49, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 50, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 51, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 52, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 53, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 54, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 55, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 56, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads resources", - "uid": "1940f6ef765a506a171faa2056c956c3", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Remote ruler reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Evaluations / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads", - "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-rollout-progress.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Ready" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Updated" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 10, - "x": 0, - "y": 0 - }, - "id": 1, - "links": [ ], - "options": { - "barRadius": 0, - "barWidth": 0.96999999999999997, - "fullHighlight": false, - "groupWidth": 0.69999999999999996, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "xField": "Workload", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0 - }, - "targets": [ - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - }, - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - } - ], - "title": "Rollout progress", - "transformations": [ - { - "id": "joinByField", - "options": { - "byField": "workload", - "mode": "outer" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time 1": true, - "Time 2": true - }, - "renameByName": { - "Value #A": "Updated", - "Value #B": "Ready", - "workload": "Workload" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "field": "Workload" - } - ] - } - } - ], - "type": "barchart" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 0 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 4 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.01 - }, - { - "color": "red", - "value": 0.050000000000000003 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 4 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 4 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "noValue": "All healthy", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 3, - "w": 10, - "x": 0, - "y": 13 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "text": { - "titleSize": 14, - "valueSize": 14 - }, - "textMode": "value_and_name" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{deployment}}", - "legendLink": null, - "step": null - }, - { - "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{statefulset}}", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Unhealthy pods", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "r.*" - }, - "properties": [ - { - "id": "custom.align", - "value": "center" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 10, - "y": 8 - }, - "id": 11, - "targets": [ - { - "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods count per version", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "valueLabel": "version" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "container": 1 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "container" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10 - }, - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "writes", - "legendLink": null - }, - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "reads", - "legendLink": null - } - ], - "title": "Latency vs 24h ago", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": null, - "schemaVersion": 27, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Rollout progress", - "uid": "7f0b5567d543a1698e695b530eb7f5de", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-rollout-progress.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-ruler.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Active configurations", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total rules", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Read from ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Write to ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Evaluations per second", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluations global", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - key-value store for rulers ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failures / sec" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failures / sec", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - blocks storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Delivery errors", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Dropped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Missed iterations", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failures", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Group evaluations", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluation per user", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler configuration object store (ruler accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-ruler.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-scaling.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "id": 1, - "options": { - "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", - "mode": "markdown" - }, - "span": 12, - "title": "", - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Service scaling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "400px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Required Replicas", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "__name__", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Service", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "deployment", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "namespace", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Reason", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Workload-based scaling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Scaling", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Scaling", - "uid": "64bbad83507b7289b514725658e10352", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-scaling.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-slow-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Accross tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 User-Agents", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "fetched_chunk_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_index_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "response_size_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_hit_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_miss_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "estimated_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_chunks_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Time span" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Duration" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "queue_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "query_wall_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "height": "500px", - "id": 19, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Slow queries", - "transformations": [ - { - "id": "extractFields", - "options": { - "source": "labels" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "component": true, - "container": true, - "gossip_ring_member": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "length": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_step": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "response_time": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "err": 10, - "length_seconds": 3, - "param_end": 5, - "param_query": 8, - "param_start": 4, - "param_step_seconds": 7, - "param_time": 6, - "response_time_seconds": 9, - "status": 1, - "ts": 0, - "user": 2 - }, - "renameByName": { - "err": "Error", - "length_seconds": "Time span", - "param_end": "End", - "param_query": "Query", - "param_start": "Start", - "param_step_seconds": "Step", - "param_time": "Time (instant query)", - "response_time_seconds": "Duration", - "ts": "Completion date", - "user": "Tenant ID" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "sharded_queries" - }, - { - "destinationType": "number", - "targetField": "split_queries" - }, - { - "destinationType": "number", - "targetField": "fetched_chunk_bytes" - }, - { - "destinationType": "number", - "targetField": "fetched_index_bytes" - }, - { - "destinationType": "number", - "targetField": "response_size_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_hit_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_miss_bytes" - }, - { - "destinationType": "number", - "targetField": "estimated_series_count" - }, - { - "destinationType": "number", - "targetField": "fetched_chunks_count" - }, - { - "destinationType": "number", - "targetField": "fetched_series_count" - }, - { - "destinationType": "number", - "targetField": "Time span" - }, - { - "destinationType": "number", - "targetField": "Duration" - }, - { - "destinationType": "number", - "targetField": "Step" - }, - { - "destinationType": "number", - "targetField": "queue_time_seconds" - }, - { - "destinationType": "number", - "targetField": "query_wall_time_seconds" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "includeAll": false, - "label": "Loki data source", - "multi": false, - "name": "loki_datasource", - "query": "loki", - "type": "datasource" - }, - { - "current": { - "selected": true, - "text": "5s", - "value": "5s" - }, - "hide": 0, - "label": "Min duration", - "name": "min_duration", - "options": [ - { - "selected": true, - "text": "5s", - "value": "5s" - } - ], - "query": "5s", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "User-Agent HTTP Header", - "name": "user_agent", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Slow queries", - "uid": "6089e1ce1e678788f46312a0a1e647e6", - "version": 0 - } -kind: ConfigMap + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-slow-queries.json + name: integrations-memcached namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "in-memory", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "owned", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "All series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series per ingester", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Owned series per ingester", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant series counts", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "title": "Series with exemplars", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Oldest exemplar age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "Native histogram series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "buckets", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "buckets ({{ name }})", - "legendLink": null - } - ], - "title": "Total number of buckets used by native histogram series", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars and native histograms", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor requests incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor requests received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Newest seen sample age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded requests rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor ingestion requests", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor samples incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor samples received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "deduplicated", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "non-HA", - "legendLink": null - } - ], - "title": "Distributor deduplicated/non-HA", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (distributor)", - "legendLink": null - }, - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (ingester)", - "legendLink": null - } - ], - "title": "Distributor and ingester discarded samples rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Symbol table size for loaded blocks", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Space used by local blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingesters' storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "time_series", - "legendFormat": "groups", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Number of groups", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "rules", - "legendLink": null - } - ], - "title": "Number of rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Total evaluations rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failed evaluations rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Sent notifications rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Failed notifications rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "alerts", - "legendLink": null - }, - { - "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "silences", - "legendLink": null - } - ], - "title": "Alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (User)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - ruler-query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - ruler-query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (Ruler)", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 50, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactions", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "user", - "multi": false, - "name": "user", - "options": [ ], - "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "500", - "value": "500" - }, - { - "selected": false, - "text": "1000", - "value": "1000" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Tenants", - "uid": "35fa247ce651ba189debf33d7ae41611", - "version": 0 - } -kind: ConfigMap + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-tenants.json + name: integrations-mysql namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-top-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Top tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by active series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By active series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by in-memory series (series created - series removed)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by discarded samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by series with exemplars", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By series with exemplars", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "exemplars/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received exemplars rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By exemplars rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group size", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group evaluation time", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Compaction Jobs", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by estimated compaction jobs from bucket-index", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By estimated compaction jobs from bucket-index", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Top tenants", - "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", - "version": 0 - } -kind: ConfigMap + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-top-tenants.json + name: integrations-redis namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-writes-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", - "version": 0 - } -kind: ConfigMap + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes-networking.json + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 namespace: monitoring-system +type: Opaque --- apiVersion: v1 -data: - mimir-writes-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", - "version": 0 - } -kind: ConfigMap +kind: Service metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes-resources.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 -data: - mimir-writes.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "125px", - "panels": [ - { - "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Writes dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Samples / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", - "fill": 1, - "format": "short", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "In-memory series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", - "fill": 1, - "format": "short", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars in ingesters", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for high-availability (HA) deduplication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for distributors ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - key-value store for the ingesters ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Uploaded blocks / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Upload latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - shipper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Compactions latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB head", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "WAL truncations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Checkpoints created / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "WAL truncations latency (includes checkpointing)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "WAL" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "mmap-ed chunks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E28A42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "WAL", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "mmap-ed chunks", - "legendLink": null - } - ], - "title": "Corruptions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB write ahead log (WAL)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "incoming exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "received exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars received rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "ingested exemplars", - "legendLink": null - } - ], - "title": "Ingester ingested exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "appended exemplars", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected distributor requests", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected ingester requests", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Instance Limits", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", - "version": 0 - } -kind: ConfigMap +kind: Service metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster namespace: monitoring-system ---- -apiVersion: v1 -data: - MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= -kind: Secret -metadata: - labels: - app.kubernetes.io/component: mimir - app.kubernetes.io/instance: mimir-monolithic-mode - app.kubernetes.io/managed-by: Kustomize - app.kubernetes.io/name: mimir - app.kubernetes.io/version: 2.11.0 - name: mimir-env-92ddctt858 - namespace: monitoring-system -type: Opaque +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 kind: Service @@ -43673,1595 +1047,148 @@ spec: - emptyDir: {} name: storage --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: agent-flow-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: clustering - rules: - - alert: ClusterNotConverging - annotations: - message: Cluster is not converging. - expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) - != 0 - for: 5m - - alert: ClusterSplitBrain - annotations: - message: Cluster nodes have entered a split brain state. - expr: | - sum without (state) (cluster_node_peers) != - on (cluster, namespace) group_left - count by (cluster, namespace) (cluster_node_info) - for: 5m - - alert: ClusterLamportClockDrift - annotations: - message: Cluster nodes' lamport clocks are not converging. - expr: stddev by (cluster, namespace) (cluster_node_lamport_time) > 4 * sqrt(count - by (cluster, namespace) (cluster_node_info)) - for: 5m - - alert: ClusterNodeUnhealthy - annotations: - message: Cluster node is reporting a health score > 0. - expr: | - cluster_node_gossip_health_score > 0 - for: 5m - - alert: ClusterLamportClockStuck - annotations: - message: Cluster nodes's lamport clocks is not progressing. - expr: | - sum by (cluster, namespace, instance) (rate(cluster_node_lamport_time[2m])) == 0 - and on (cluster, namespace, instance) (cluster_node_peers > 1) - for: 5m - - alert: ClusterNodeNameConflict - annotations: - message: A node tried to join the cluster with a name conflicting with an - existing peer. - expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) - > 0 - for: 10m - - alert: ClusterNodeStuckTerminating - annotations: - message: Cluster node stuck in Terminating state. - expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) - > 0 - for: 5m - - alert: ClusterConfigurationDrift - annotations: - message: Cluster nodes are not using the same configuration file. - expr: | - count without (sha256) ( - max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) - ) > 1 - for: 5m - - name: agent_controller - rules: - - alert: SlowComponentEvaluations - annotations: - message: Flow component evaluations are taking too long. - expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) - > 0 - for: 15m - - alert: UnhealthyComponents - annotations: - message: Unhealthy Flow components detected. - expr: sum(agent_component_controller_running_components{health_type!="healthy"}) - > 0 - for: 15m ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +apiVersion: apps/v1 +kind: DaemonSet metadata: - name: mimir-mixin-alerts + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_alerts - rules: - - alert: MimirIngesterUnhealthy - annotations: - message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ - printf "%f" $value }} unhealthy ingester(s). - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy - expr: | - min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 - for: 15m - labels: - severity: critical - - alert: MimirRequestErrors - annotations: - message: | - The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors - expr: | - 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) - / - sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) - > 1 - for: 15m - labels: - severity: critical - - alert: MimirRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency - expr: | - cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} - > - 2.5 - for: 15m - labels: - severity: warning - - alert: MimirQueriesIncorrect - annotations: - message: | - The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect - expr: | - 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) - / - sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 - for: 15m - labels: - severity: warning - - alert: MimirInconsistentRuntimeConfig - annotations: - message: | - An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig - expr: | - count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 - for: 1h - labels: - severity: critical - - alert: MimirBadRuntimeConfig - annotations: - message: | - {{ $labels.job }} failed to reload runtime config. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig - expr: | - # The metric value is reset to 0 on error while reloading the config at runtime. - cortex_runtime_config_last_reload_successful == 0 - for: 5m - labels: - severity: critical - - alert: MimirFrontendQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 - for: 5m - labels: - severity: critical - - alert: MimirSchedulerQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 - for: 7m - labels: - severity: critical - - alert: MimirCacheRequestErrors - annotations: - message: | - The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors - expr: | - ( - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operation_failures_total[1m]) - or - rate(thanos_cache_operation_failures_total[1m]) - ) - / - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operations_total[1m]) - or - rate(thanos_cache_operations_total[1m]) - ) - ) * 100 > 5 - for: 5m - labels: - severity: warning - - alert: MimirIngesterRestarts - annotations: - message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts - expr: | - ( - sum by(cluster, namespace, pod) ( - increase(kube_pod_container_status_restarts_total{container=~"(ingester|mimir-write)"}[30m]) - ) - >= 2 - ) - and - ( - count by(cluster, namespace, pod) (cortex_build_info) > 0 - ) - labels: - severity: warning - - alert: MimirKVStoreFailure - annotations: - message: | - Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure - expr: | - ( - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) - / - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) - ) - # We want to get alerted only in case there's a constant failure. - == 1 - for: 5m - labels: - severity: critical - - alert: MimirMemoryMapAreasTooHigh - annotations: - message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas - close to the limit.' - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh - expr: | - process_memory_map_areas{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirIngesterInstanceHasNoTenants - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no tenants assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants - expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) - and on (cluster, namespace) - # Only if there are more time-series than would be expected due to continuous testing load - ( - sum by(cluster, namespace) (cortex_ingester_memory_series) - / - max by(cluster, namespace) (cortex_distributor_replication_factor) - ) > 100000 - for: 1h - labels: - severity: warning - - alert: MimirRulerInstanceHasNoRuleGroups - annotations: - message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no rule groups assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups - expr: | - # Alert on ruler instances in microservices mode that have no rule groups assigned, - min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?ruler.*"}) == 0 - # but only if other ruler instances of the same cell do have rule groups assigned - and on (cluster, namespace) - (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) - # and there are more than two instances overall - and on (cluster, namespace) - (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) - for: 1h - labels: - severity: warning - - alert: MimirIngestedDataTooFarInTheFuture - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has ingested samples with timestamps more than 1h in the future. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture - expr: | - max by(cluster, namespace, pod) ( - cortex_ingester_tsdb_head_max_timestamp_seconds - time() - and - cortex_ingester_tsdb_head_max_timestamp_seconds > 0 - ) > 60*60 - for: 5m - labels: - severity: warning - - alert: MimirRingMembersMismatch - annotations: - message: | - Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch - expr: | - ( - avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"})) - != sum by(cluster, namespace) (up{job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"}) - ) - and - ( - count by(cluster, namespace) (cortex_build_info) > 0 - ) - for: 15m - labels: - component: ingester - severity: warning - - name: mimir_instance_limits_alerts - rules: - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.8 - for: 3h - labels: - severity: warning - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.9 - for: 5m - labels: - severity: critical - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.7 - for: 5m - labels: - severity: warning - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirReachingTCPConnectionsLimit - annotations: - message: | - Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit - expr: | - cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and - cortex_tcp_connections_limit > 0 - for: 5m - labels: - severity: critical - - alert: MimirDistributorReachingInflightPushRequestLimit - annotations: - message: | - Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit - expr: | - ( - (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) - and ignoring (limit) - (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - name: mimir-rollout-alerts - rules: - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - max without (revision) ( - sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - unless - sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - * - ( - sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - != - sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - ) and ( - changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: statefulset - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - != - sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - ) and ( - changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: deployment - - alert: RolloutOperatorNotReconciling - annotations: - message: | - Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling - expr: | - max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 - for: 5m - labels: - severity: critical - - name: mimir-provisioning - rules: - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.65 - for: 15m - labels: - severity: warning - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.8 - for: 15m - labels: - severity: critical - - name: ruler_alerts - rules: - - alert: MimirRulerTooManyFailedPushes - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerTooManyFailedQueries - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerMissedEvaluations - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations - expr: | - 100 * ( - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) - / - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) - ) > 1 - for: 5m - labels: - severity: warning - - alert: MimirRulerFailedRingCheck - annotations: - message: | - Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck - expr: | - sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) - > 0 - for: 5m - labels: - severity: critical - - alert: MimirRulerRemoteEvaluationFailing - annotations: - message: | - Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing - expr: | - 100 * ( - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(ruler-query-frontend.*)"}[5m])) - / - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(ruler-query-frontend.*)"}[5m])) - ) > 1 - for: 5m - labels: - severity: warning - - name: gossip_alerts - rules: - - alert: MimirGossipMembersTooHigh - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a higher than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh - expr: | - max by (cluster, namespace) (memberlist_client_cluster_members_count) - > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) - for: 20m - labels: - severity: warning - - alert: MimirGossipMembersTooLow - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a lower than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow - expr: | - min by (cluster, namespace) (memberlist_client_cluster_members_count) - < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) - for: 20m - labels: - severity: warning - - name: etcd_alerts - rules: - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.65 - for: 15m - labels: - severity: warning - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.8 - for: 15m - labels: - severity: critical - - name: alertmanager_alerts - rules: - - alert: MimirAlertmanagerSyncConfigsFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing - expr: | - rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 - for: 30m - labels: - severity: critical - - alert: MimirAlertmanagerRingCheckFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing - expr: | - rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPartialStateMergeFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing - expr: | - rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerReplicationFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing - expr: | - rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPersistStateFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing - expr: | - rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 - for: 1h - labels: - severity: critical - - alert: MimirAlertmanagerInitialSyncFailed - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed - expr: | - increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 - labels: - severity: critical - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: warning - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: critical - - alert: MimirAlertmanagerInstanceHasNoTenants - annotations: - message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} owns no tenants. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants - expr: | - # Alert on alertmanager instances in microservices mode that own no tenants, - min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?alertmanager.*"}) == 0 - # but only if other instances of the same cell do have tenants assigned. - and on (cluster, namespace) - max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 - for: 1h - labels: - severity: warning - - name: mimir_blocks_alerts - rules: - - alert: MimirIngesterHasNotShippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks - expr: | - (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) - and - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) - and - # Only if the ingester has ingested samples over the last 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - and - # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica - # had ingested samples in the past, then no traffic was received for a long period and then it starts - # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving - # samples, while the a block shipping is expected within the next 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterHasNotShippedBlocksSinceStart - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart - expr: | - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) - and - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - for: 4h - labels: - severity: critical - - alert: MimirIngesterHasUnshippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't - been successfully uploaded to the storage yet. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks - expr: | - (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) - and - (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadCompactionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to compact TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed - expr: | - rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed - expr: | - rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointCreationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to create TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointDeletionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to delete TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBWALTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed - expr: | - rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 - labels: - severity: warning - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 - and - # and there is only one zone - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 - labels: - deployment: single-zone - severity: critical - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 - and - # and there are multiple zones - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 - labels: - deployment: multi-zone - severity: critical - - alert: MimirIngesterTSDBWALWritesFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to write to TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed - expr: | - rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 - for: 3m - labels: - severity: critical - - alert: MimirStoreGatewayHasNotSyncTheBucket - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} has not successfully synched the bucket since {{ $value - | humanizeDuration }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket - expr: | - (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) - and - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 - for: 5m - labels: - severity: critical - - alert: MimirStoreGatewayNoSyncedTenants - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not syncing any blocks for any tenant. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants - expr: | - min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 - for: 1h - labels: - severity: warning - - alert: MimirBucketIndexNotUpdated - annotations: - message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster - }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated - expr: | - min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 - labels: - severity: critical - - name: mimir_compactor_alerts - rules: - - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully cleaned up blocks in the last 6 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) - for: 1h - labels: - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) - and - (cortex_compactor_last_successful_run_timestamp_seconds > 0) - for: 1h - labels: - reason: in-last-24h - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - cortex_compactor_last_successful_run_timestamp_seconds == 0 - for: 24h - labels: - reason: since-startup - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} failed to run 2 consecutive compactions. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 - labels: - reason: consecutive-failures - severity: critical - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) - and - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 15m - labels: - severity: critical - time_period: 24h - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block since its start. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 24h - labels: - severity: critical - time_period: since-start - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 - for: 1m - labels: - severity: warning - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 - for: 30m - labels: - severity: critical - - name: mimir_autoscaling - rules: - - alert: MimirAutoscalerNotActive - annotations: - message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler - }} in {{ $labels.namespace }} is not active. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive - expr: | - ( - label_replace(( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - # Add "metric" label. - + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0), - "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" - ) - ) - # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, - # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported - # by KEDA could not exist at all or being exposed with a value of 0. - and on (cluster, namespace, metric, scaledObject) - (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) - for: 1h - labels: - severity: critical - - alert: MimirAutoscalerKedaFailing - annotations: - message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace - }} is experiencing errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing - expr: | - ( - # Find KEDA scalers reporting errors. - label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - ) - > 0 - for: 1h - labels: - severity: critical - - name: mimir_continuous_test - rules: - - alert: MimirContinuousTestNotRunningOnWrites - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because writes are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestNotRunningOnReads - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because queries are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestFailed + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} failed when asserting query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent labels: - severity: warning + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-9cc7gk9k2b + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate --- apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +kind: ServiceMonitor metadata: - name: mimir-mixin-rules + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_api_1 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / - sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:sum_rate - - name: mimir_api_2 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job, route) - record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, - route) - record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_api_3 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_querier_api - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job) - record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - name: mimir_storage - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:50quantile - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds:avg - - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate - - name: mimir_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:50quantile - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - by (cluster, job) - record: cluster_job:cortex_query_frontend_retries:avg - - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) - record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by - (cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by - (le, cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate - - name: mimir_ingester_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:50quantile - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series:avg - - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:50quantile - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples:avg - - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:50quantile - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars:avg - - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate - - name: mimir_received_samples - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_samples:rate5m - - name: mimir_exemplars_in - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) - record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m - - name: mimir_received_exemplars - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m - - name: mimir_exemplars_ingested - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) - record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m - - name: mimir_exemplars_appended - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) - record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m - - name: mimir_scaling_rules - rules: - - expr: | - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (cluster, namespace, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - record: cluster_namespace_deployment:actual_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - / 240000 - ) - labels: - deployment: distributor - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 240000 - ) - labels: - deployment: distributor - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - * 3 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by(cluster, namespace) ( - cortex_ingester_memory_series - )[24h:] - ) - / 1500000 - ) - labels: - deployment: ingester - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) - * 3 * 0.59999999999999998 / 1500000 - ) - labels: - deployment: ingester - reason: active_series_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - (sum by (cluster, namespace) ( - cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} - ) / 4) - / - avg by (cluster, namespace) ( - memcached_limit_bytes{job=~".+/memcached"} - ) - ) - labels: - deployment: memcached - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate - - expr: | - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - labels: - reason: cpu_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_memory_usage_bytes:sum - - expr: | - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) - labels: - reason: memory_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - name: mimir_alertmanager_rules - rules: - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_alerts) - record: cluster_job_pod:cortex_alertmanager_alerts:sum - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_silences) - record: cluster_job_pod:cortex_alertmanager_silences:sum - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m - - name: mimir_ingester_rules - rules: - - expr: | - sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) - record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -45296,3 +1223,37 @@ spec: app.kubernetes.io/component: mimir app.kubernetes.io/instance: mimir-monolithic-mode app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/monolithic-mode/metrics/kustomization.yaml b/kubernetes/monolithic-mode/metrics/kustomization.yaml index 9f1dfd97..b1f798e4 100644 --- a/kubernetes/monolithic-mode/metrics/kustomization.yaml +++ b/kubernetes/monolithic-mode/metrics/kustomization.yaml @@ -8,23 +8,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - mimir -- ../../../monitoring-mixins/agent-flow-mixin/deploy -- ../../../monitoring-mixins/go-runtime-mixin/deploy -- ../../../monitoring-mixins/mimir-mixin/deploy configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river - -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-mimir.yaml diff --git a/kubernetes/monolithic-mode/profiles/configs/config.river b/kubernetes/monolithic-mode/profiles/configs/config.river index 3c031bf3..0e2632e2 100644 --- a/kubernetes/monolithic-mode/profiles/configs/config.river +++ b/kubernetes/monolithic-mode/profiles/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -31,3 +34,28 @@ module.file "profiles_primary" { clustering = true } } + +/******************************************** + * Metrics + ********************************************/ +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/metrics.river" + + arguments { + forward_to = [module.file.lgtmp.exports.metrics_receiver] + clustering = true + } +} + +/******************************************** + * Agent Integrations + ********************************************/ +module.file "agent_integrations" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/integrations.river" + + arguments { + name = "agent-integrations" + namespace = "monitoring-system" + forward_to = [module.file.lgtmp.exports.metrics_receiver] + } +} diff --git a/kubernetes/monolithic-mode/profiles/configs/grafana-datasources-pyroscope.yaml b/kubernetes/monolithic-mode/profiles/configs/grafana-datasources-pyroscope.yaml deleted file mode 100644 index 0ff4f5e1..00000000 --- a/kubernetes/monolithic-mode/profiles/configs/grafana-datasources-pyroscope.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Profiles - uid: profiles - -datasources: -# Pyroscope for profiles -- name: Profiles - type: grafana-pyroscope-datasource - uid: profiles - access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml b/kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml index f3692ef2..c6f28e0f 100644 --- a/kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml +++ b/kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml @@ -5,6 +5,30 @@ metadata: --- apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/instance: pyroscope @@ -42,6 +66,108 @@ rules: - get --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: @@ -61,14 +187,35 @@ subjects: name: pyroscope namespace: profiles-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- apiVersion: v1 data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -76,37 +223,637 @@ data: * Profiles\n ********************************************/\nmodule.file \"profiles_primary\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + \"/profiles.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.profiles_receiver]\n\t\tclustering - = true\n\t}\n}\n" + = true\n\t}\n}\n\n/********************************************\n * Metrics\n + ********************************************/\nmodule.file \"metrics_primary\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering + = true\n\t}\n}\n\n/********************************************\n * Agent Integrations\n + ********************************************/\nmodule.file \"agent_integrations\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/integrations.river\"\n\n\targuments {\n\t\tname = \"agent-integrations\"\n\t\tnamespace + \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" +kind: ConfigMap +metadata: + name: agent-config-52gfhcfbb4 + namespace: monitoring-system +--- +apiVersion: v1 +data: + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-integrations + namespace: monitoring-system +--- +apiVersion: v1 +data: + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 data: - datasources.yaml: | - apiVersion: 1 + alertmanager_fallback_config.yaml: | + route: + group_wait: 0s + receiver: empty-receiver + + receivers: + # In this example we're not going to send any notification out of Alertmanager. + - name: 'empty-receiver' + mimir.yaml: | + # Do not use this configuration in production. + # It is for demonstration purposes only. + multitenancy_enabled: false + + # -usage-stats.enabled=false + usage_stats: + enabled: false + + server: + http_listen_port: 8080 + grpc_listen_port: 9095 + log_level: info + + # https://grafana.com/docs/mimir/latest/references/configuration-parameters/#use-environment-variables-in-the-configuration + common: + storage: + backend: s3 + s3: + endpoint: ${MIMIR_S3_ENDPOINT:minio.minio-system.svc:443} + access_key_id: ${MIMIR_S3_ACCESS_KEY_ID:lgtmp} + secret_access_key: ${MIMIR_S3_SECRET_ACCESS_KEY:supersecret} + insecure: ${MIMIR_S3_INSECURE:false} + http: + insecure_skip_verify: true + + alertmanager: + data_dir: /data/alertmanager + enable_api: true + external_url: /alertmanager + fallback_config_file: /etc/mimir/alertmanager_fallback_config.yaml + alertmanager_storage: + s3: + bucket_name: mimir-alertmanager + + + memberlist: + join_members: [ mimir-memberlist:7946 ] + + ingester: + ring: + replication_factor: 1 + + store_gateway: + sharding_ring: + replication_factor: 1 + + + blocks_storage: + s3: + bucket_name: mimir-blocks + tsdb: + dir: /data/ingester + ship_interval: 1m + block_ranges_period: [ 2h ] + retention_period: 3h + bucket_store: + index_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + chunks_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + metadata_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + ruler: + rule_path: /data/rules + enable_api: true + alertmanager_url: http://localhost:8080/alertmanager + ruler_storage: + s3: + bucket_name: mimir-ruler + cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 - deleteDatasources: - - name: Profiles - uid: profiles + compactor: + compaction_interval: 30s + data_dir: /data/mimir-compactor + cleanup_interval: 1m + tenant_cleanup_delay: 1m - datasources: - # Pyroscope for profiles - - name: Profiles - type: grafana-pyroscope-datasource - uid: profiles - access: proxy - url: http://nginx.gateway.svc.cluster.local:4040 - basicAuth: false - isDefault: true - version: 1 - editable: true + limits: + native_histograms_ingestion_enabled: true + + overrides_exporter: + ring: + enabled: true + wait_stability_min_duration: 30s + + runtime_config: + file: /etc/mimir/runtime.yaml + runtime.yaml: |- + # This file can be used to set overrides or other runtime config. + ingester_limits: # limits that each ingester replica enforces + max_ingestion_rate: 20000 + max_series: 1500000 + max_tenants: 1000 + max_inflight_push_requests: 30000 + + distributor_limits: # limits that each distributor replica enforces + max_ingestion_rate: 20000 + max_inflight_push_requests: 30000 + max_inflight_push_requests_bytes: 50000000 + + overrides: + anonymous: # limits for anonymous that the whole cluster enforces + # ingestion_tenant_shard_size: 9 + max_global_series_per_user: 1500000 + max_fetched_series_per_query: 100000 + native_histograms_ingestion_enabled: true + ruler_max_rules_per_rule_group: 50 kind: ConfigMap metadata: labels: - grafana_datasource: "1" - name: grafana-datasources-t756b6d8cg + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-config-958c4gm5k9 namespace: monitoring-system --- apiVersion: v1 @@ -158,6 +905,51 @@ metadata: namespace: profiles-system --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 data: PYROSCOPE_STORAGE_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= kind: Secret @@ -168,6 +960,133 @@ type: Opaque --- apiVersion: v1 kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + ports: + - name: http-metrics + port: 8080 + - name: grpc-distribut + port: 9095 + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + prometheus.io/service-monitor: "false" + name: mimir-memberlist + namespace: monitoring-system +spec: + clusterIP: None + ports: + - appProtocol: tcp + name: tcp-gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist +--- +apiVersion: v1 +kind: Service metadata: labels: app.kubernetes.io/component: all @@ -200,6 +1119,7 @@ metadata: app.kubernetes.io/name: pyroscope app.kubernetes.io/version: 1.5.0 helm.sh/chart: pyroscope-1.5.0 + prometheus.io/service-monitor: "false" name: pyroscope-headless namespace: profiles-system spec: @@ -240,6 +1160,92 @@ spec: type: ClusterIP --- apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + template: + metadata: + annotations: + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: info + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: mimir + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + spec: + containers: + - args: + - -target=all + - -config.expand-env=true + - -config.file=/etc/mimir/mimir.yaml + - -memberlist.bind-addr=$(POD_IP) + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + envFrom: + - secretRef: + name: mimir-env-92ddctt858 + image: docker.io/grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: mimir + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc-distribut + - containerPort: 7946 + name: http-memberlist + readinessProbe: + httpGet: + path: /ready + port: http-metrics + resources: + limits: + cpu: 999m + memory: 1Gi + requests: + cpu: 10m + memory: 55Mi + volumeMounts: + - mountPath: /etc/mimir + name: config + - mountPath: /data + name: storage + terminationGracePeriodSeconds: 60 + volumes: + - configMap: + name: mimir-config-958c4gm5k9 + name: config + - emptyDir: {} + name: storage +--- +apiVersion: apps/v1 kind: StatefulSet metadata: labels: @@ -351,3 +1357,250 @@ spec: app.kubernetes.io/component: all app.kubernetes.io/instance: pyroscope app.kubernetes.io/name: pyroscope +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-52gfhcfbb4 + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + endpoints: + - port: http-metrics + relabelings: + - replacement: monitoring-system/mimir + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - monitoring-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: all + app.kubernetes.io/instance: pyroscope + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: pyroscope + app.kubernetes.io/version: 1.5.0 + helm.sh/chart: pyroscope-1.5.0 + name: pyroscope + namespace: profiles-system +spec: + endpoints: + - port: http2 + relabelings: + - action: replace + replacement: profiles-system/pyroscope + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - profiles-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: all + app.kubernetes.io/instance: pyroscope + app.kubernetes.io/name: pyroscope +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/monolithic-mode/profiles/kustomization.yaml b/kubernetes/monolithic-mode/profiles/kustomization.yaml index aee60fad..0b50aa5a 100644 --- a/kubernetes/monolithic-mode/profiles/kustomization.yaml +++ b/kubernetes/monolithic-mode/profiles/kustomization.yaml @@ -8,20 +8,15 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - pyroscope +# optional +- ../metrics/mimir + configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river - -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-pyroscope.yaml diff --git a/kubernetes/monolithic-mode/profiles/pyroscope/values-k3d-k3s.yaml b/kubernetes/monolithic-mode/profiles/pyroscope/values-k3d-k3s.yaml index 858d24c5..a6aef30e 100644 --- a/kubernetes/monolithic-mode/profiles/pyroscope/values-k3d-k3s.yaml +++ b/kubernetes/monolithic-mode/profiles/pyroscope/values-k3d-k3s.yaml @@ -4,6 +4,18 @@ agent: minio: enabled: false +serviceMonitor: + enabled: true + namespaceSelector: + matchNames: + - profiles-system + relabelings: + - action: replace + replacement: profiles-system/pyroscope + sourceLabels: + - job + targetLabel: job + pyroscope: extraArgs: log.level: debug diff --git a/kubernetes/monolithic-mode/traces/configs/config.river b/kubernetes/monolithic-mode/traces/configs/config.river index 0fde764f..976a01a9 100644 --- a/kubernetes/monolithic-mode/traces/configs/config.river +++ b/kubernetes/monolithic-mode/traces/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -34,11 +37,6 @@ module.file "traces_primary" { } } -tracing { - sampling_fraction = 0.8 - write_to = [module.file.traces_primary.exports.agent_traces_input] -} - /******************************************** * Metrics ********************************************/ diff --git a/kubernetes/monolithic-mode/traces/configs/grafana-datasources-tempo.yaml b/kubernetes/monolithic-mode/traces/configs/grafana-datasources-tempo.yaml deleted file mode 100644 index ebe3c764..00000000 --- a/kubernetes/monolithic-mode/traces/configs/grafana-datasources-tempo.yaml +++ /dev/null @@ -1,62 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Metrics - uid: metrics -- name: Traces - uid: traces - -datasources: -# Mimir for metrics -- name: Metrics - type: prometheus - uid: metrics - access: proxy - orgId: 1 - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: false - version: 1 - editable: true - jsonData: - prometheusType: Mimir - exemplarTraceIdDestinations: - - name: traceID - datasourceUid: traces - -# Tempo for traces -- name: Traces - type: tempo - access: proxy - uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 - basicAuth: false - isDefault: true - version: 1 - editable: true - jsonData: - search: - hide: false - nodeGraph: - enabled: true - serviceMap: - datasourceUid: metrics - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - spanBar: - type: 'Tag' - tag: 'http.path' - tracesToMetrics: - datasourceUid: metrics - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'service.name', value: 'service' }, { key: 'span_name' }, { key: 'http_method' }] - queries: - - name: '(R) Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' - - name: '(E) Error Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' - - name: '(D) Duration' - query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' diff --git a/kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml b/kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml index b161e6d7..83de150e 100644 --- a/kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml +++ b/kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml @@ -5,6 +5,18 @@ metadata: --- apiVersion: v1 kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount metadata: labels: app.kubernetes.io/component: mimir @@ -28,6 +40,127 @@ metadata: name: tempo namespace: tracing-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- apiVersion: v1 data: gateway_mimir.conf.template: "server {\n listen 8080;\n listen [::]:8080;\n\n @@ -77,865 +210,15 @@ metadata: namespace: gateway --- apiVersion: v1 -data: - agent-cluster-node.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Node Info", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Lamport clock time" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Internal cluster state observers" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip health score" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip protocol version" - } - ], - "title": "Node Info", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #(.*)", - "renamePattern": "$1" - } - }, - { - "id": "reduce", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { }, - "indexByName": { }, - "renameByName": { - "Field": "Metric", - "Max": "Value" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{event}}", - "range": true - } - ], - "title": "Gossip ops/s", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:peers" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Known peers", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node by state (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "{{state}}", - "range": true - } - ], - "title": "Peers by state", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "title": "Gossip Transport", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Transport bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Packet write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", - "fieldConfig": { - "defaults": { - "unit": "pkts" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "tx queue", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "rx queue", - "range": true - } - ], - "title": "Pending packet queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Stream bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Stream write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "Open streams", - "range": true - } - ], - "title": "Open transport streams", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "instance", - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Node", - "uid": "dd370cd333b2d9258435fb1b5a20a89b" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-node.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-cluster-overview.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Nodes", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Nodes info.\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Dashboard" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "index": 0, - "text": "Link" - } - }, - "type": "value" - } - ] - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Detail dashboard for node", - "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Node table", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": false, - "__name__": true, - "cluster": true, - "namespace": true, - "state": false - }, - "indexByName": { }, - "renameByName": { - "Value": "Dashboard", - "instance": "", - "state": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "1": { - "color": "red", - "index": 1, - "text": "Not converged" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "green", - "index": 0, - "text": "Converged" - } - }, - "type": "special" - } - ], - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 9 - }, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Convergance state", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 80, - "spanNulls": true - }, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "text": "Yes" - } - }, - "type": "value" - }, - { - "options": { - "1": { - "color": "red", - "text": "No" - } - }, - "type": "value" - } - ], - "max": 1, - "noValue": 0 - } - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 9 - }, - "options": { - "mergeValues": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", - "instant": false, - "legendFormat": "Converged", - "range": true - } - ], - "title": "Convergance state timeline", - "type": "state-timeline" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Overview", - "uid": "7e07f9c975fcfc2a6e120a95f579f843" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-overview.json - namespace: monitoring-system ---- -apiVersion: v1 data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -944,8 +227,7 @@ data: {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + \"/traces.river\"\n\n\targuments {\n\t\tmetrics_forward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tlogs_forward_to \ = [module.file.lgtmp.exports.logs_receiver]\n\t\ttraces_forward_to = [module.file.lgtmp.exports.traces_receiver]\n\t\tcluster - \ = coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t}\n}\n\ntracing - {\n\tsampling_fraction = 0.8\n\twrite_to = [module.file.traces_primary.exports.agent_traces_input]\n}\n\n/********************************************\n + \ = coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t}\n}\n\n/********************************************\n * Metrics\n ********************************************/\nmodule.file \"metrics_primary\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering @@ -956,9365 +238,485 @@ data: \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-controller.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component controller documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", - "fieldConfig": { - "defaults": { - "unit": "agents" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 0 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running agents", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The number of running components across all running agents.\n", - "fieldConfig": { - "defaults": { - "unit": "components" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 4 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running components", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The percentage of components which are in a healthy state.\n", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "No components", - "unit": "percentunit" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 8 - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "text": { - "valueSize": 80 - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Overall component health", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", - "fieldConfig": { - "defaults": { - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Unhealthy" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unknown" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "blue", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Exited" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 14, - "x": 10, - "y": 0 - }, - "options": { - "orientation": "vertical", - "showUnfilled": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Healthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Unhealthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", - "instant": true, - "legendFormat": "Unknown", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", - "instant": true, - "legendFormat": "Exited", - "range": false - } - ], - "title": "Components by health", - "type": "bargauge" - }, - { - "datasource": "${datasource}", - "description": "The frequency at which components get updated.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "options": { - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Component evaluation rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Component evaluation time", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Slow components evaluation times", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component evaluation histogram", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component dependency wait histogram", - "type": "heatmap" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Controller", - "uid": "f861e5fef2e795edd5c4c73bee1ba769" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-controller.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-opentelemetry.json: |- - { - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Receivers for traces [otelcol.receiver]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully pushed into the pipeline.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Accepted spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans that could not be pushed into the pipeline.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Refused spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The duration of inbound RPCs.\n", - "fieldConfig": { - "defaults": { - "unit": "milliseconds" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "RPC server duration (traces)", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "title": "Batching [otelcol.processor.batch]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of units in the batch\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Number of units in the batch", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Number of distinct metadata value combinations being processed\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Distinct metadata values", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of times the batch was sent due to a timeout trigger\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Timeout trigger", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "title": "Exporters for traces [otelcol.exporter]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully sent to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported sent spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans in failed attempts to send to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported failed spans", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / OpenTelemetry", - "uid": "c90e752eb8c0fce588f906b7279aceea" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-opentelemetry.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-prometheus-remote-write.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "prometheus.scrape", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "% of targets successfully scraped", - "range": true - } - ], - "title": "Scrape success rate in $cluster", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p99", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p95", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p50", - "range": true - } - ], - "title": "Scrape duration in $cluster", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "title": "prometheus.remote_write", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "WAL delay", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Data write throughput", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Minimum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Maximum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Minimum", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Maximum", - "range": true - } - ], - "title": "Shards", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total outgoing samples sent by prometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Sent samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Failed samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Retried samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "showLegend": false - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "Series", - "range": true - } - ], - "title": "Active series (total)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Active series (by instance/component)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Active series (by component)", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "component", - "multi": true, - "name": "component", - "query": { - "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", - "refId": "component" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "url", - "multi": true, - "name": "url", - "query": { - "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", - "refId": "url" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Prometheus Components", - "uid": "ee34ffa2d084547d650e1d96a26306aa" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-prometheus-remote-write.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-resources.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Resident memory size of the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate at which the Grafana Agent process performs garbage collections.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Garbage collections", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Heap memory currently in use by the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (heap inuse)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network send bandwidth", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Resources", - "uid": "d47aae5c53be5550f8e3bc8a904ba61a" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - go-runtime.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Go runtime metrics", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 14, - "iteration": 1623758038990, - "links": [ ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average total bytes of memory reserved across all process instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Total Reserved Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average stack memory usage across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: stack inuse (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Stack Memory Use", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mspan (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mcache (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: buck hash (avg)", - "refId": "E" - }, - { - "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: gc (avg)", - "refId": "F" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Other Memory Reservations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap reserved (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap in use (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap alloc (avg)", - "refId": "C" - }, - { - "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap idle (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap released (avg)", - "refId": "E" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average allocation rate in bytes per second, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{job}}: bytes malloced/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Allocation Rate, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average rate of heap object allocation, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{job}}: obj mallocs/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Object Allocation Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of live memory objects across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: object count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Number of Live Objects", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of goroutines across instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: goroutine count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: min gc time (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: max gc time (avg)", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GC min & max duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} next gc bytes (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Next GC, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "go-runtime" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "MONITORING", - "value": "MONITORING" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "pilot", - "value": "pilot" - }, - "datasource": "$datasource", - "definition": "label_values(go_info, job)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "job", - "multi": false, - "name": "job", - "options": [ ], - "query": { - "query": "label_values(go_info, job)", - "refId": "MONITORING-job-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "definition": "label_values(go_info{job=\"$job\"}, instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": { - "query": "label_values(go_info{job=\"$job\"}, instance)", - "refId": "MONITORING-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Go runtime metrics", - "uid": "T4sSTLBGzgp", - "version": 1 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Go - Runtime - labels: - grafana_dashboard: "1" - name: go-runtime.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Metrics - uid: metrics - - name: Traces - uid: traces - - datasources: - # Mimir for metrics - - name: Metrics - type: prometheus - uid: metrics - access: proxy - orgId: 1 - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: false - version: 1 - editable: true - jsonData: - prometheusType: Mimir - exemplarTraceIdDestinations: - - name: traceID - datasourceUid: traces - - # Tempo for traces - - name: Traces - type: tempo - access: proxy - uid: traces - url: http://nginx.gateway.svc.cluster.local:3200 - basicAuth: false - isDefault: true - version: 1 - editable: true - jsonData: - search: - hide: false - nodeGraph: - enabled: true - serviceMap: - datasourceUid: metrics - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - spanBar: - type: 'Tag' - tag: 'http.path' - tracesToMetrics: - datasourceUid: metrics - spanStartTimeShift: '-30m' - spanEndTimeShift: '30m' - tags: [{ key: 'service.name', value: 'service' }, { key: 'span_name' }, { key: 'http_method' }] - queries: - - name: '(R) Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))' - - name: '(E) Error Rate' - query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))' - - name: '(D) Duration' - query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))' -kind: ConfigMap -metadata: - labels: - grafana_datasource: "1" - name: grafana-datasources-k2hbd65tcb - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager resources", - "uid": "a6883fb22799ac74479c7db872451092", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total alerts", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total silences", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "APS", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts received", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "per pod Active Aggregation Groups", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts grouping", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alert notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod tenants", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod silences", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Replication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Syncs/sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Syncs/sec (by reason)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "errors", - "legendLink": null - } - ], - "title": "Ring check errors/sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant configuration sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "{{outcome}}", - "legendLink": null - } - ], - "title": "Initial syncs /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Initial sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "1m", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Fetch state from other alertmanagers /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding initial state sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Replicate state to other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Merge state from other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Persist state to remote storage /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding runtime state sync", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager", - "uid": "b0d38d318bbddd80476246d4930f9e55", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager.json + name: agent-config-h9mgdthkmd namespace: monitoring-system --- apiVersion: v1 data: - mimir-compactor-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU and memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", - "version": 0 - } + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor-resources.json + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - mimir-compactor.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "completed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "started" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#34CCEB", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "started", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "completed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Per-instance runs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", - "fieldConfig": { - "defaults": { - "max": 1, - "noValue": 1, - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Tenants compaction progress", - "type": "timeseries" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "No compactor data", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "text", - "text": "No successful runs since startup yet" - }, - "to": 0 - }, - "type": "range" - } - ] - }, - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 7200 - }, - { - "color": "orange", - "value": 21600 - }, - { - "color": "red", - "value": 43200 - } - ] - } - } - ] - } - ] - }, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "reduceOptions": { - "calcs": [ - "first" - ], - "fields": "/^Last run$/", - "values": false - }, - "textMode": "value" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Longest time since last successful run", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - } - ], - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Status" - }, - "properties": [ - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "transparent", - "text": "N/A" - }, - "to": 0 - }, - "type": "range" - }, - { - "options": { - "from": 0, - "result": { - "color": "green", - "text": "Ok" - }, - "to": 7200 - }, - "type": "range" - }, - { - "options": { - "from": 7200, - "result": { - "color": "yellow", - "text": "Delayed" - }, - "to": 21600 - }, - "type": "range" - }, - { - "options": { - "from": 21600, - "result": { - "color": "orange", - "text": "Late" - }, - "to": 43200 - }, - "type": "range" - }, - { - "options": { - "from": 43200, - "result": { - "color": "red", - "text": "Very late" - }, - "to": "Infinity" - }, - "type": "range" - }, - { - "options": { - "match": "null+nan", - "result": { - "color": "transparent", - "text": "Unknown" - } - }, - "type": "special" - } - ] - }, - { - "id": "custom.width", - "value": 86 - }, - { - "id": "custom.align", - "value": "center" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "text": "Never" - }, - "to": 0 - }, - "type": "range" - } - ] - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "legendFormat": "Last run", - "legendLink": null - } - ], - "title": "Last successful run per-compactor replica", - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - }, - { - "id": "calculateField", - "options": { - "alias": "One", - "binary": { - "left": "Last run", - "operator": "/", - "right": "Last run" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "calculateField", - "options": { - "alias": "Status", - "binary": { - "left": "Last run", - "operator": "*", - "right": "One" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Compactor", - "Last run", - "Status" - ] - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", - "format": "time_series", - "legendFormat": "Jobs", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "compactions", - "legendLink": null - } - ], - "title": "TSDB compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "TSDB compaction duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "Average blocks / tenant", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Tenants with largest number of blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks marked for deletion / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks deletions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Garbage collector", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Metadata syncs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Metadata sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Metadata sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Object Store", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Key-value store for compactors ring", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", - "version": 0 - } + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor.json + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 @@ -10459,33175 +861,6 @@ metadata: namespace: monitoring-system --- apiVersion: v1 -data: - mimir-config.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Startup config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Startup config file", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Runtime config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Runtime config file", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Config", - "uid": "5d9d0b4724c0f80d68467088ec61e003", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-config.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-object-store.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "RPS / component", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "Error rate / component", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Components", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "RPS / operation", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate / operation", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Operations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Object Store", - "uid": "e1324ee2a434f4158c00a9ee279d3292", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-object-store.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overrides.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 1, - "span": 12, - "targets": [ - { - "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Defaults", - "transformations": [ - { - "id": "labelsToFields", - "options": { } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Value": 1, - "limit_name": 0 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "limit_name" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 2, - "span": 12, - "targets": [ - { - "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Per-tenant overrides", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "limit_name" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "user": 0 - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overrides", - "uid": "1e2c358600ac53f09faea133f811b5bb", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overrides.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview resources", - "uid": "a9b92d3c4d1af325d872a9e9a7083d71", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#7EB26D", - "value": null - }, - { - "color": "#EAB839", - "value": 0.01 - }, - { - "color": "#E24D42", - "value": 0.050000000000000003 - } - ] - } - } - }, - "id": 2, - "options": { - "showValue": "never" - }, - "span": 6, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Writes", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Reads", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Rule evaluations", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", - "instant": false, - "legendFormat": "Alerting notifications", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Object storage", - "range": true - } - ], - "title": "Status", - "type": "state-timeline" - }, - { - "id": 3, - "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", - "alertName": "Mimir", - "dashboardAlerts": false, - "maxItems": 100, - "sortOrder": 3, - "stateFilter": { - "error": true, - "firing": true, - "noData": false, - "normal": false, - "pending": false - } - }, - "span": 3, - "title": "Firing alerts", - "type": "alertlist" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Mimir cluster health", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 4, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Write requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "cps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "samples / sec", - "legendLink": null - }, - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "exemplars / sec", - "legendLink": null - } - ], - "title": "Ingestion / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", - "datasource": null, - "description": "", - "id": 8, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Read requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Read latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "\"active series\" queries", - "color": "#C788DE" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "instant queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_active_series\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"active series\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_names\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label name cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_values\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label value cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars|cardinality_.*)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "other", - "legendLink": null - } - ], - "title": "Queries / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 12, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Rule evaluations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Rule evaluations latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Alerting notifications sent to Alertmanager / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Recording and alerting rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", - "datasource": null, - "description": "", - "id": 16, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "attributes", - "color": "#429D48" - }, - { - "alias": "delete", - "color": "#F1C731" - }, - { - "alias": "exists", - "color": "#2A66CF" - }, - { - "alias": "get", - "color": "#9E44C1" - }, - { - "alias": "get_range", - "color": "#FFAB57" - }, - { - "alias": "iter", - "color": "#C79424" - }, - { - "alias": "upload", - "color": "#84D586" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Total number of blocks in the storage", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Long-term storage (object storage)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Retries", - "type": "timeseries", - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "splitting rate", - "legendLink": null - } - ], - "title": "Intervals per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", - "format": "time_series", - "legendFormat": "{{request_type}}", - "legendLink": null - } - ], - "title": "Query results cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Query results cache skipped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query splitting and results cache", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "sharded queries ratio", - "legendLink": null - } - ], - "title": "Sharded queries ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of sharded queries per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query sharding", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Series per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Samples per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Exemplars per query", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failure Rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failure Rate", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected queries", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Max", - "legendLink": null - }, - { - "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Min", - "legendLink": null - }, - { - "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "legendLink": null - } - ], - "title": "Bucket indexes loaded (per querier)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Bucket indexes load / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Bucket indexes load latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks queried / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data fetched / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data touched / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request average latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request 99th percentile latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", - "format": "time_series", - "legendFormat": "% of time reduced by preloading", - "legendLink": null - } - ], - "title": "Series batch preloading efficiency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Blocks currently owned", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks loaded / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks dropped / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Lazy loaded index-headers", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load gate latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Series hash cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "ExpandedPostings cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Chunks attributes in-memory cache hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Queries", - "uid": "b3abe8d5c040395cc36615cb4334c92d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Instant queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Range queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label names queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label values queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Series queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "Requests/s", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Cache – query results", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{item_type}}", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – block index cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – chunks cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 40, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 41, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 42, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 43, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 44, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 45, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 46, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 47, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 48, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 49, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 50, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 51, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 52, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 53, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 54, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 55, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 56, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads resources", - "uid": "1940f6ef765a506a171faa2056c956c3", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Remote ruler reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Evaluations / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads", - "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-rollout-progress.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Ready" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Updated" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 10, - "x": 0, - "y": 0 - }, - "id": 1, - "links": [ ], - "options": { - "barRadius": 0, - "barWidth": 0.96999999999999997, - "fullHighlight": false, - "groupWidth": 0.69999999999999996, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "xField": "Workload", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0 - }, - "targets": [ - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - }, - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - } - ], - "title": "Rollout progress", - "transformations": [ - { - "id": "joinByField", - "options": { - "byField": "workload", - "mode": "outer" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time 1": true, - "Time 2": true - }, - "renameByName": { - "Value #A": "Updated", - "Value #B": "Ready", - "workload": "Workload" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "field": "Workload" - } - ] - } - } - ], - "type": "barchart" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 0 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 4 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.01 - }, - { - "color": "red", - "value": 0.050000000000000003 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 4 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 4 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "noValue": "All healthy", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 3, - "w": 10, - "x": 0, - "y": 13 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "text": { - "titleSize": 14, - "valueSize": 14 - }, - "textMode": "value_and_name" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{deployment}}", - "legendLink": null, - "step": null - }, - { - "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{statefulset}}", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Unhealthy pods", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "r.*" - }, - "properties": [ - { - "id": "custom.align", - "value": "center" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 10, - "y": 8 - }, - "id": 11, - "targets": [ - { - "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods count per version", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "valueLabel": "version" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "container": 1 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "container" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10 - }, - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "writes", - "legendLink": null - }, - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "reads", - "legendLink": null - } - ], - "title": "Latency vs 24h ago", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": null, - "schemaVersion": 27, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Rollout progress", - "uid": "7f0b5567d543a1698e695b530eb7f5de", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-rollout-progress.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-ruler.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Active configurations", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total rules", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Read from ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Write to ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Evaluations per second", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluations global", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - key-value store for rulers ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failures / sec" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failures / sec", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - blocks storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Delivery errors", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Dropped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Missed iterations", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failures", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Group evaluations", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluation per user", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler configuration object store (ruler accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-ruler.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-scaling.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "id": 1, - "options": { - "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", - "mode": "markdown" - }, - "span": 12, - "title": "", - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Service scaling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "400px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Required Replicas", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "__name__", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Service", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "deployment", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "namespace", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Reason", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Workload-based scaling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Scaling", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Scaling", - "uid": "64bbad83507b7289b514725658e10352", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-scaling.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-slow-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Accross tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 User-Agents", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "fetched_chunk_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_index_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "response_size_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_hit_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_miss_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "estimated_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_chunks_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Time span" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Duration" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "queue_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "query_wall_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "height": "500px", - "id": 19, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Slow queries", - "transformations": [ - { - "id": "extractFields", - "options": { - "source": "labels" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "component": true, - "container": true, - "gossip_ring_member": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "length": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_step": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "response_time": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "err": 10, - "length_seconds": 3, - "param_end": 5, - "param_query": 8, - "param_start": 4, - "param_step_seconds": 7, - "param_time": 6, - "response_time_seconds": 9, - "status": 1, - "ts": 0, - "user": 2 - }, - "renameByName": { - "err": "Error", - "length_seconds": "Time span", - "param_end": "End", - "param_query": "Query", - "param_start": "Start", - "param_step_seconds": "Step", - "param_time": "Time (instant query)", - "response_time_seconds": "Duration", - "ts": "Completion date", - "user": "Tenant ID" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "sharded_queries" - }, - { - "destinationType": "number", - "targetField": "split_queries" - }, - { - "destinationType": "number", - "targetField": "fetched_chunk_bytes" - }, - { - "destinationType": "number", - "targetField": "fetched_index_bytes" - }, - { - "destinationType": "number", - "targetField": "response_size_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_hit_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_miss_bytes" - }, - { - "destinationType": "number", - "targetField": "estimated_series_count" - }, - { - "destinationType": "number", - "targetField": "fetched_chunks_count" - }, - { - "destinationType": "number", - "targetField": "fetched_series_count" - }, - { - "destinationType": "number", - "targetField": "Time span" - }, - { - "destinationType": "number", - "targetField": "Duration" - }, - { - "destinationType": "number", - "targetField": "Step" - }, - { - "destinationType": "number", - "targetField": "queue_time_seconds" - }, - { - "destinationType": "number", - "targetField": "query_wall_time_seconds" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "includeAll": false, - "label": "Loki data source", - "multi": false, - "name": "loki_datasource", - "query": "loki", - "type": "datasource" - }, - { - "current": { - "selected": true, - "text": "5s", - "value": "5s" - }, - "hide": 0, - "label": "Min duration", - "name": "min_duration", - "options": [ - { - "selected": true, - "text": "5s", - "value": "5s" - } - ], - "query": "5s", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "User-Agent HTTP Header", - "name": "user_agent", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Slow queries", - "uid": "6089e1ce1e678788f46312a0a1e647e6", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-slow-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "in-memory", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "owned", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "All series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series per ingester", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Owned series per ingester", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant series counts", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "title": "Series with exemplars", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Oldest exemplar age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "Native histogram series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "buckets", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "buckets ({{ name }})", - "legendLink": null - } - ], - "title": "Total number of buckets used by native histogram series", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars and native histograms", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor requests incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor requests received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Newest seen sample age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded requests rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor ingestion requests", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor samples incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor samples received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "deduplicated", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "non-HA", - "legendLink": null - } - ], - "title": "Distributor deduplicated/non-HA", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (distributor)", - "legendLink": null - }, - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (ingester)", - "legendLink": null - } - ], - "title": "Distributor and ingester discarded samples rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Symbol table size for loaded blocks", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Space used by local blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingesters' storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "time_series", - "legendFormat": "groups", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Number of groups", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "rules", - "legendLink": null - } - ], - "title": "Number of rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Total evaluations rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failed evaluations rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Sent notifications rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Failed notifications rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "alerts", - "legendLink": null - }, - { - "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "silences", - "legendLink": null - } - ], - "title": "Alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (User)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - ruler-query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - ruler-query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (Ruler)", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 50, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactions", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "user", - "multi": false, - "name": "user", - "options": [ ], - "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "500", - "value": "500" - }, - { - "selected": false, - "text": "1000", - "value": "1000" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Tenants", - "uid": "35fa247ce651ba189debf33d7ae41611", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-top-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Top tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by active series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By active series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by in-memory series (series created - series removed)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by discarded samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by series with exemplars", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By series with exemplars", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "exemplars/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received exemplars rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By exemplars rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group size", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group evaluation time", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Compaction Jobs", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by estimated compaction jobs from bucket-index", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By estimated compaction jobs from bucket-index", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Top tenants", - "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-top-tenants.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-writes.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "125px", - "panels": [ - { - "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Writes dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Samples / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", - "fill": 1, - "format": "short", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "In-memory series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", - "fill": 1, - "format": "short", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars in ingesters", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for high-availability (HA) deduplication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for distributors ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - key-value store for the ingesters ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Uploaded blocks / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Upload latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - shipper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Compactions latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB head", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "WAL truncations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Checkpoints created / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "WAL truncations latency (includes checkpointing)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "WAL" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "mmap-ed chunks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E28A42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "WAL", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "mmap-ed chunks", - "legendLink": null - } - ], - "title": "Corruptions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB write ahead log (WAL)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "incoming exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "received exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars received rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "ingested exemplars", - "legendLink": null - } - ], - "title": "Ingester ingested exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "appended exemplars", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected distributor requests", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected ingester requests", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Instance Limits", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes.json - namespace: monitoring-system ---- -apiVersion: v1 data: overrides.yaml: | overrides: @@ -43762,35 +995,143 @@ metadata: --- apiVersion: v1 data: - MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= kind: Secret metadata: - labels: - app.kubernetes.io/component: mimir - app.kubernetes.io/instance: mimir-monolithic-mode - app.kubernetes.io/managed-by: Kustomize - app.kubernetes.io/name: mimir - app.kubernetes.io/version: 2.11.0 - name: mimir-env-92ddctt858 + name: integrations-memcached namespace: monitoring-system type: Opaque --- apiVersion: v1 data: - JAEGER_AGENT_HOST: Z3JhZmFuYS1hZ2VudC5tb25pdG9yaW5nLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbA== - JAEGER_AGENT_PORT: NjgzMQ== - JAEGER_SAMPLER_PARAM: MQ== - JAEGER_SAMPLER_TYPE: Y29uc3Q= - JAEGER_TAGS: YXBwPXRlbXBv - TEMPO_S3_SECRET_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= kind: Secret metadata: - name: tempo-env-gk54k88t7g - namespace: tracing-system + name: integrations-mysql + namespace: monitoring-system type: Opaque --- apiVersion: v1 -kind: Service +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + JAEGER_AGENT_HOST: Z3JhZmFuYS1hZ2VudC5tb25pdG9yaW5nLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbA== + JAEGER_AGENT_PORT: NjgzMQ== + JAEGER_SAMPLER_PARAM: MQ== + JAEGER_SAMPLER_TYPE: Y29uc3Q= + JAEGER_TAGS: YXBwPXRlbXBv + TEMPO_S3_SECRET_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: tempo-env-gk54k88t7g + namespace: tracing-system +type: Opaque +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service metadata: labels: app.kubernetes.io/component: mimir @@ -44078,1595 +1419,148 @@ spec: updateStrategy: type: RollingUpdate --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: agent-flow-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: clustering - rules: - - alert: ClusterNotConverging - annotations: - message: Cluster is not converging. - expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) - != 0 - for: 5m - - alert: ClusterSplitBrain - annotations: - message: Cluster nodes have entered a split brain state. - expr: | - sum without (state) (cluster_node_peers) != - on (cluster, namespace) group_left - count by (cluster, namespace) (cluster_node_info) - for: 5m - - alert: ClusterLamportClockDrift - annotations: - message: Cluster nodes' lamport clocks are not converging. - expr: stddev by (cluster, namespace) (cluster_node_lamport_time) > 4 * sqrt(count - by (cluster, namespace) (cluster_node_info)) - for: 5m - - alert: ClusterNodeUnhealthy - annotations: - message: Cluster node is reporting a health score > 0. - expr: | - cluster_node_gossip_health_score > 0 - for: 5m - - alert: ClusterLamportClockStuck - annotations: - message: Cluster nodes's lamport clocks is not progressing. - expr: | - sum by (cluster, namespace, instance) (rate(cluster_node_lamport_time[2m])) == 0 - and on (cluster, namespace, instance) (cluster_node_peers > 1) - for: 5m - - alert: ClusterNodeNameConflict - annotations: - message: A node tried to join the cluster with a name conflicting with an - existing peer. - expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) - > 0 - for: 10m - - alert: ClusterNodeStuckTerminating - annotations: - message: Cluster node stuck in Terminating state. - expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) - > 0 - for: 5m - - alert: ClusterConfigurationDrift - annotations: - message: Cluster nodes are not using the same configuration file. - expr: | - count without (sha256) ( - max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) - ) > 1 - for: 5m - - name: agent_controller - rules: - - alert: SlowComponentEvaluations - annotations: - message: Flow component evaluations are taking too long. - expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) - > 0 - for: 15m - - alert: UnhealthyComponents - annotations: - message: Unhealthy Flow components detected. - expr: sum(agent_component_controller_running_components{health_type!="healthy"}) - > 0 - for: 15m ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +apiVersion: apps/v1 +kind: DaemonSet metadata: - name: mimir-mixin-alerts + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_alerts - rules: - - alert: MimirIngesterUnhealthy - annotations: - message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ - printf "%f" $value }} unhealthy ingester(s). - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy - expr: | - min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 - for: 15m - labels: - severity: critical - - alert: MimirRequestErrors - annotations: - message: | - The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors - expr: | - 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) - / - sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) - > 1 - for: 15m - labels: - severity: critical - - alert: MimirRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency - expr: | - cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} - > - 2.5 - for: 15m - labels: - severity: warning - - alert: MimirQueriesIncorrect - annotations: - message: | - The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect - expr: | - 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) - / - sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 - for: 15m - labels: - severity: warning - - alert: MimirInconsistentRuntimeConfig - annotations: - message: | - An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig - expr: | - count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 - for: 1h - labels: - severity: critical - - alert: MimirBadRuntimeConfig - annotations: - message: | - {{ $labels.job }} failed to reload runtime config. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig - expr: | - # The metric value is reset to 0 on error while reloading the config at runtime. - cortex_runtime_config_last_reload_successful == 0 - for: 5m - labels: - severity: critical - - alert: MimirFrontendQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 - for: 5m - labels: - severity: critical - - alert: MimirSchedulerQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 - for: 7m - labels: - severity: critical - - alert: MimirCacheRequestErrors - annotations: - message: | - The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors - expr: | - ( - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operation_failures_total[1m]) - or - rate(thanos_cache_operation_failures_total[1m]) - ) - / - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operations_total[1m]) - or - rate(thanos_cache_operations_total[1m]) - ) - ) * 100 > 5 - for: 5m - labels: - severity: warning - - alert: MimirIngesterRestarts - annotations: - message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts - expr: | - ( - sum by(cluster, namespace, pod) ( - increase(kube_pod_container_status_restarts_total{container=~"(ingester|mimir-write)"}[30m]) - ) - >= 2 - ) - and - ( - count by(cluster, namespace, pod) (cortex_build_info) > 0 - ) - labels: - severity: warning - - alert: MimirKVStoreFailure - annotations: - message: | - Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure - expr: | - ( - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) - / - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) - ) - # We want to get alerted only in case there's a constant failure. - == 1 - for: 5m - labels: - severity: critical - - alert: MimirMemoryMapAreasTooHigh - annotations: - message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas - close to the limit.' - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh - expr: | - process_memory_map_areas{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirIngesterInstanceHasNoTenants - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no tenants assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants - expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) - and on (cluster, namespace) - # Only if there are more time-series than would be expected due to continuous testing load - ( - sum by(cluster, namespace) (cortex_ingester_memory_series) - / - max by(cluster, namespace) (cortex_distributor_replication_factor) - ) > 100000 - for: 1h - labels: - severity: warning - - alert: MimirRulerInstanceHasNoRuleGroups - annotations: - message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no rule groups assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups - expr: | - # Alert on ruler instances in microservices mode that have no rule groups assigned, - min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?ruler.*"}) == 0 - # but only if other ruler instances of the same cell do have rule groups assigned - and on (cluster, namespace) - (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) - # and there are more than two instances overall - and on (cluster, namespace) - (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) - for: 1h - labels: - severity: warning - - alert: MimirIngestedDataTooFarInTheFuture - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has ingested samples with timestamps more than 1h in the future. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture - expr: | - max by(cluster, namespace, pod) ( - cortex_ingester_tsdb_head_max_timestamp_seconds - time() - and - cortex_ingester_tsdb_head_max_timestamp_seconds > 0 - ) > 60*60 - for: 5m - labels: - severity: warning - - alert: MimirRingMembersMismatch - annotations: - message: | - Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch - expr: | - ( - avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"})) - != sum by(cluster, namespace) (up{job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"}) - ) - and - ( - count by(cluster, namespace) (cortex_build_info) > 0 - ) - for: 15m - labels: - component: ingester - severity: warning - - name: mimir_instance_limits_alerts - rules: - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.8 - for: 3h - labels: - severity: warning - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.9 - for: 5m - labels: - severity: critical - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.7 - for: 5m - labels: - severity: warning - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirReachingTCPConnectionsLimit - annotations: - message: | - Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit - expr: | - cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and - cortex_tcp_connections_limit > 0 - for: 5m - labels: - severity: critical - - alert: MimirDistributorReachingInflightPushRequestLimit - annotations: - message: | - Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit - expr: | - ( - (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) - and ignoring (limit) - (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - name: mimir-rollout-alerts - rules: - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - max without (revision) ( - sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - unless - sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - * - ( - sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - != - sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - ) and ( - changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: statefulset - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - != - sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - ) and ( - changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: deployment - - alert: RolloutOperatorNotReconciling - annotations: - message: | - Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling - expr: | - max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 - for: 5m - labels: - severity: critical - - name: mimir-provisioning - rules: - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.65 - for: 15m - labels: - severity: warning - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.8 - for: 15m - labels: - severity: critical - - name: ruler_alerts - rules: - - alert: MimirRulerTooManyFailedPushes - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerTooManyFailedQueries - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerMissedEvaluations - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations - expr: | - 100 * ( - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) - / - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) - ) > 1 - for: 5m - labels: - severity: warning - - alert: MimirRulerFailedRingCheck - annotations: - message: | - Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck - expr: | - sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) - > 0 - for: 5m - labels: - severity: critical - - alert: MimirRulerRemoteEvaluationFailing - annotations: - message: | - Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing - expr: | - 100 * ( - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(ruler-query-frontend.*)"}[5m])) - / - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(ruler-query-frontend.*)"}[5m])) - ) > 1 - for: 5m - labels: - severity: warning - - name: gossip_alerts - rules: - - alert: MimirGossipMembersTooHigh - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a higher than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh - expr: | - max by (cluster, namespace) (memberlist_client_cluster_members_count) - > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) - for: 20m - labels: - severity: warning - - alert: MimirGossipMembersTooLow - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a lower than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow - expr: | - min by (cluster, namespace) (memberlist_client_cluster_members_count) - < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) - for: 20m - labels: - severity: warning - - name: etcd_alerts - rules: - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.65 - for: 15m - labels: - severity: warning - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.8 - for: 15m - labels: - severity: critical - - name: alertmanager_alerts - rules: - - alert: MimirAlertmanagerSyncConfigsFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing - expr: | - rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 - for: 30m - labels: - severity: critical - - alert: MimirAlertmanagerRingCheckFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing - expr: | - rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPartialStateMergeFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing - expr: | - rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerReplicationFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing - expr: | - rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPersistStateFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing - expr: | - rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 - for: 1h - labels: - severity: critical - - alert: MimirAlertmanagerInitialSyncFailed - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed - expr: | - increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 - labels: - severity: critical - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: warning - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: critical - - alert: MimirAlertmanagerInstanceHasNoTenants - annotations: - message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} owns no tenants. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants - expr: | - # Alert on alertmanager instances in microservices mode that own no tenants, - min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?alertmanager.*"}) == 0 - # but only if other instances of the same cell do have tenants assigned. - and on (cluster, namespace) - max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 - for: 1h - labels: - severity: warning - - name: mimir_blocks_alerts - rules: - - alert: MimirIngesterHasNotShippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks - expr: | - (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) - and - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) - and - # Only if the ingester has ingested samples over the last 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - and - # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica - # had ingested samples in the past, then no traffic was received for a long period and then it starts - # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving - # samples, while the a block shipping is expected within the next 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterHasNotShippedBlocksSinceStart - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart - expr: | - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) - and - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - for: 4h - labels: - severity: critical - - alert: MimirIngesterHasUnshippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't - been successfully uploaded to the storage yet. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks - expr: | - (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) - and - (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadCompactionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to compact TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed - expr: | - rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed - expr: | - rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointCreationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to create TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointDeletionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to delete TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBWALTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed - expr: | - rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 - labels: - severity: warning - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 - and - # and there is only one zone - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 - labels: - deployment: single-zone - severity: critical - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 - and - # and there are multiple zones - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 - labels: - deployment: multi-zone - severity: critical - - alert: MimirIngesterTSDBWALWritesFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to write to TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed - expr: | - rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 - for: 3m - labels: - severity: critical - - alert: MimirStoreGatewayHasNotSyncTheBucket - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} has not successfully synched the bucket since {{ $value - | humanizeDuration }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket - expr: | - (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) - and - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 - for: 5m - labels: - severity: critical - - alert: MimirStoreGatewayNoSyncedTenants - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not syncing any blocks for any tenant. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants - expr: | - min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 - for: 1h - labels: - severity: warning - - alert: MimirBucketIndexNotUpdated - annotations: - message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster - }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated - expr: | - min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 - labels: - severity: critical - - name: mimir_compactor_alerts - rules: - - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully cleaned up blocks in the last 6 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) - for: 1h - labels: - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) - and - (cortex_compactor_last_successful_run_timestamp_seconds > 0) - for: 1h - labels: - reason: in-last-24h - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - cortex_compactor_last_successful_run_timestamp_seconds == 0 - for: 24h - labels: - reason: since-startup - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} failed to run 2 consecutive compactions. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 - labels: - reason: consecutive-failures - severity: critical - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) - and - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 15m - labels: - severity: critical - time_period: 24h - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block since its start. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 24h - labels: - severity: critical - time_period: since-start - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 - for: 1m - labels: - severity: warning - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 - for: 30m - labels: - severity: critical - - name: mimir_autoscaling - rules: - - alert: MimirAutoscalerNotActive - annotations: - message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler - }} in {{ $labels.namespace }} is not active. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive - expr: | - ( - label_replace(( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - # Add "metric" label. - + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0), - "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" - ) - ) - # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, - # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported - # by KEDA could not exist at all or being exposed with a value of 0. - and on (cluster, namespace, metric, scaledObject) - (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) - for: 1h - labels: - severity: critical - - alert: MimirAutoscalerKedaFailing - annotations: - message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace - }} is experiencing errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing - expr: | - ( - # Find KEDA scalers reporting errors. - label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - ) - > 0 - for: 1h - labels: - severity: critical - - name: mimir_continuous_test - rules: - - alert: MimirContinuousTestNotRunningOnWrites - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because writes are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestNotRunningOnReads - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because queries are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestFailed + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} failed when asserting query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent labels: - severity: warning + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-h9mgdthkmd + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate --- apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +kind: ServiceMonitor metadata: - name: mimir-mixin-rules + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_api_1 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / - sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:sum_rate - - name: mimir_api_2 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job, route) - record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, - route) - record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_api_3 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_querier_api - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job) - record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - name: mimir_storage - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:50quantile - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds:avg - - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate - - name: mimir_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:50quantile - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - by (cluster, job) - record: cluster_job:cortex_query_frontend_retries:avg - - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) - record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by - (cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by - (le, cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate - - name: mimir_ingester_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:50quantile - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series:avg - - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:50quantile - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples:avg - - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:50quantile - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars:avg - - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate - - name: mimir_received_samples - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_samples:rate5m - - name: mimir_exemplars_in - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) - record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m - - name: mimir_received_exemplars - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m - - name: mimir_exemplars_ingested - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) - record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m - - name: mimir_exemplars_appended - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) - record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m - - name: mimir_scaling_rules - rules: - - expr: | - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (cluster, namespace, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - record: cluster_namespace_deployment:actual_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - / 240000 - ) - labels: - deployment: distributor - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 240000 - ) - labels: - deployment: distributor - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - * 3 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by(cluster, namespace) ( - cortex_ingester_memory_series - )[24h:] - ) - / 1500000 - ) - labels: - deployment: ingester - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) - * 3 * 0.59999999999999998 / 1500000 - ) - labels: - deployment: ingester - reason: active_series_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - (sum by (cluster, namespace) ( - cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} - ) / 4) - / - avg by (cluster, namespace) ( - memcached_limit_bytes{job=~".+/memcached"} - ) - ) - labels: - deployment: memcached - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate - - expr: | - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - labels: - reason: cpu_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_memory_usage_bytes:sum - - expr: | - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) - labels: - reason: memory_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - name: mimir_alertmanager_rules - rules: - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_alerts) - record: cluster_job_pod:cortex_alertmanager_alerts:sum - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_silences) - record: cluster_job_pod:cortex_alertmanager_silences:sum - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m - - name: mimir_ingester_rules - rules: - - expr: | - sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) - record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -45730,3 +1624,37 @@ spec: matchLabels: app.kubernetes.io/instance: tempo app.kubernetes.io/name: tempo +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/monolithic-mode/traces/kustomization.yaml b/kubernetes/monolithic-mode/traces/kustomization.yaml index 71f2f95c..4272a450 100644 --- a/kubernetes/monolithic-mode/traces/kustomization.yaml +++ b/kubernetes/monolithic-mode/traces/kustomization.yaml @@ -8,29 +8,19 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - tempo + +# optional - ../metrics/mimir -- ../../../monitoring-mixins/agent-flow-mixin/deploy -- ../../../monitoring-mixins/go-runtime-mixin/deploy -- ../../../monitoring-mixins/mimir-mixin/deploy -# - ../../../monitoring-mixins/tempo-mixin/deploy configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-tempo.yaml - # Update Tempo Monolithic Mode endpoint in gateway - name: nginx-templates namespace: gateway diff --git a/kubernetes/read-write-mode/logs/configs/config.river b/kubernetes/read-write-mode/logs/configs/config.river index f7fb6921..ffb94479 100644 --- a/kubernetes/read-write-mode/logs/configs/config.river +++ b/kubernetes/read-write-mode/logs/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" @@ -34,6 +37,18 @@ module.file "logs_primary" { } } +/******************************************** + * Metrics + ********************************************/ +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/metrics.river" + + arguments { + forward_to = [module.file.lgtmp.exports.metrics_receiver] + clustering = true + } +} + /******************************************** * Agent Integrations ********************************************/ diff --git a/kubernetes/read-write-mode/logs/configs/grafana-datasources-loki.yaml b/kubernetes/read-write-mode/logs/configs/grafana-datasources-loki.yaml deleted file mode 100644 index 46f535f3..00000000 --- a/kubernetes/read-write-mode/logs/configs/grafana-datasources-loki.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Logs - uid: logs - -datasources: -# Loki for logs -- name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/read-write-mode/logs/k8s-all-in-one.yaml b/kubernetes/read-write-mode/logs/k8s-all-in-one.yaml index 75f25a64..5e8a3510 100644 --- a/kubernetes/read-write-mode/logs/k8s-all-in-one.yaml +++ b/kubernetes/read-write-mode/logs/k8s-all-in-one.yaml @@ -16,6 +16,132 @@ metadata: name: loki namespace: logging-system --- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -39,6 +165,25 @@ rules: --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: monitoring-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding metadata: labels: app.kubernetes.io/instance: loki @@ -194,9 +339,11 @@ data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -206,41 +353,636 @@ data: + \"/logs.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.logs_receiver]\n\t\tgit_repo \ = \"https://github.com/qclaogui/agent-modules.git\"\n\t\tgit_rev = \"main\"\n\t\tgit_pull_freq = \"0s\"\n\t}\n}\n\n/********************************************\n - * Agent Integrations\n ********************************************/\nmodule.file - \"agent_integrations\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/integrations.river\"\n\n\targuments {\n\t\tname = - \"agent-integrations\"\n\t\tnamespace = \"monitoring-system\"\n\t\tforward_to - = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" + * Metrics\n ********************************************/\nmodule.file \"metrics_primary\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/metrics.river\"\n\n\targuments {\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t\tclustering + = true\n\t}\n}\n\n/********************************************\n * Agent Integrations\n + ********************************************/\nmodule.file \"agent_integrations\" + {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/integrations.river\"\n\n\targuments {\n\t\tname = \"agent-integrations\"\n\t\tnamespace + \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" +kind: ConfigMap +metadata: + name: agent-config-6thf5hghkg + namespace: monitoring-system +--- +apiVersion: v1 +data: + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Logs - uid: logs - - datasources: - # Loki for logs - - name: Logs - type: loki - uid: logs - access: proxy - url: http://nginx.gateway.svc.cluster.local:3100 - basicAuth: false - isDefault: true - version: 1 - editable: true + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" +kind: ConfigMap +metadata: + name: agent-modules-cf8t5bf7t9 + namespace: monitoring-system +--- +apiVersion: v1 +data: + alertmanager_fallback_config.yaml: | + route: + group_wait: 0s + receiver: empty-receiver + + receivers: + # In this example we're not going to send any notification out of Alertmanager. + - name: 'empty-receiver' + mimir.yaml: | + # Do not use this configuration in production. + # It is for demonstration purposes only. + multitenancy_enabled: false + + # -usage-stats.enabled=false + usage_stats: + enabled: false + + server: + http_listen_port: 8080 + grpc_listen_port: 9095 + log_level: info + + # https://grafana.com/docs/mimir/latest/references/configuration-parameters/#use-environment-variables-in-the-configuration + common: + storage: + backend: s3 + s3: + endpoint: ${MIMIR_S3_ENDPOINT:minio.minio-system.svc:443} + access_key_id: ${MIMIR_S3_ACCESS_KEY_ID:lgtmp} + secret_access_key: ${MIMIR_S3_SECRET_ACCESS_KEY:supersecret} + insecure: ${MIMIR_S3_INSECURE:false} + http: + insecure_skip_verify: true + + alertmanager: + data_dir: /data/alertmanager + enable_api: true + external_url: /alertmanager + fallback_config_file: /etc/mimir/alertmanager_fallback_config.yaml + alertmanager_storage: + s3: + bucket_name: mimir-alertmanager + + + memberlist: + join_members: [ mimir-memberlist:7946 ] + + ingester: + ring: + replication_factor: 1 + + store_gateway: + sharding_ring: + replication_factor: 1 + + + blocks_storage: + s3: + bucket_name: mimir-blocks + tsdb: + dir: /data/ingester + ship_interval: 1m + block_ranges_period: [ 2h ] + retention_period: 3h + bucket_store: + index_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + chunks_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + metadata_cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + ruler: + rule_path: /data/rules + enable_api: true + alertmanager_url: http://localhost:8080/alertmanager + ruler_storage: + s3: + bucket_name: mimir-ruler + cache: + backend: memcached + memcached: + addresses: dns+memcached.memcached-system.svc:11211 + + compactor: + compaction_interval: 30s + data_dir: /data/mimir-compactor + cleanup_interval: 1m + tenant_cleanup_delay: 1m + + limits: + native_histograms_ingestion_enabled: true + + overrides_exporter: + ring: + enabled: true + wait_stability_min_duration: 30s + + runtime_config: + file: /etc/mimir/runtime.yaml + runtime.yaml: |- + # This file can be used to set overrides or other runtime config. + ingester_limits: # limits that each ingester replica enforces + max_ingestion_rate: 20000 + max_series: 1500000 + max_tenants: 1000 + max_inflight_push_requests: 30000 + + distributor_limits: # limits that each distributor replica enforces + max_ingestion_rate: 20000 + max_inflight_push_requests: 30000 + max_inflight_push_requests_bytes: 50000000 + + overrides: + anonymous: # limits for anonymous that the whole cluster enforces + # ingestion_tenant_shard_size: 9 + max_global_series_per_user: 1500000 + max_fetched_series_per_query: 100000 + native_histograms_ingestion_enabled: true + ruler_max_rules_per_rule_group: 50 kind: ConfigMap metadata: labels: - grafana_datasource: "1" - name: grafana-datasources-9tgbk45h65 + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-config-958c4gm5k9 namespace: monitoring-system --- apiVersion: v1 @@ -253,6 +995,51 @@ metadata: type: Opaque --- apiVersion: v1 +data: + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret +metadata: + name: integrations-memcached + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret +metadata: + name: integrations-mysql + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + name: integrations-redis + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 +data: + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir-env-92ddctt858 + namespace: monitoring-system +type: Opaque +--- +apiVersion: v1 kind: Service metadata: labels: @@ -474,6 +1261,133 @@ spec: app.kubernetes.io/name: loki type: ClusterIP --- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster + namespace: monitoring-system +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + ports: + - name: http-metrics + port: 8080 + - name: grpc-distribut + port: 9095 + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + prometheus.io/service-monitor: "false" + name: mimir-memberlist + namespace: monitoring-system +spec: + clusterIP: None + ports: + - appProtocol: tcp + name: tcp-gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -587,6 +1501,92 @@ spec: name: runtime-config --- apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + template: + metadata: + annotations: + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: info + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: mimir + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir + app.kubernetes.io/part-of: memberlist + spec: + containers: + - args: + - -target=all + - -config.expand-env=true + - -config.file=/etc/mimir/mimir.yaml + - -memberlist.bind-addr=$(POD_IP) + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + envFrom: + - secretRef: + name: mimir-env-92ddctt858 + image: docker.io/grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: mimir + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc-distribut + - containerPort: 7946 + name: http-memberlist + readinessProbe: + httpGet: + path: /ready + port: http-metrics + resources: + limits: + cpu: 999m + memory: 1Gi + requests: + cpu: 10m + memory: 55Mi + volumeMounts: + - mountPath: /etc/mimir + name: config + - mountPath: /data + name: storage + terminationGracePeriodSeconds: 60 + volumes: + - configMap: + name: mimir-config-958c4gm5k9 + name: config + - emptyDir: {} + name: storage +--- +apiVersion: apps/v1 kind: StatefulSet metadata: labels: @@ -890,3 +1890,214 @@ spec: app.kubernetes.io/component: write app.kubernetes.io/instance: loki app.kubernetes.io/name: loki +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-6thf5hghkg + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/managed-by: Kustomize + app.kubernetes.io/name: mimir + app.kubernetes.io/version: 2.11.0 + name: mimir + namespace: monitoring-system +spec: + endpoints: + - port: http-metrics + relabelings: + - replacement: monitoring-system/mimir + sourceLabels: + - job + targetLabel: job + scheme: http + namespaceSelector: + matchNames: + - monitoring-system + selector: + matchExpressions: + - key: prometheus.io/service-monitor + operator: NotIn + values: + - "false" + matchLabels: + app.kubernetes.io/component: mimir + app.kubernetes.io/instance: mimir-monolithic-mode + app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/read-write-mode/logs/kustomization.yaml b/kubernetes/read-write-mode/logs/kustomization.yaml index 06fc3c4f..8f71a8a2 100644 --- a/kubernetes/read-write-mode/logs/kustomization.yaml +++ b/kubernetes/read-write-mode/logs/kustomization.yaml @@ -8,8 +8,13 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- ../../common/grafana-agent - loki +# optional +- ../../monolithic-mode/metrics/mimir + + secretGenerator: - name: loki-env namespace: logging-system @@ -19,19 +24,10 @@ secretGenerator: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-loki.yaml - - name: loki-config namespace: logging-system behavior: replace diff --git a/kubernetes/read-write-mode/metrics/configs/config.river b/kubernetes/read-write-mode/metrics/configs/config.river index b6bfcd6d..b8070635 100644 --- a/kubernetes/read-write-mode/metrics/configs/config.river +++ b/kubernetes/read-write-mode/metrics/configs/config.river @@ -8,6 +8,9 @@ logging { format = "logfmt" } +/******************************************** + * Grafana LGTMP Stack Receiver Provider + ********************************************/ module.file "lgtmp" { filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-modules") + "/lgtmp.river" diff --git a/kubernetes/read-write-mode/metrics/configs/grafana-datasources-mimir.yaml b/kubernetes/read-write-mode/metrics/configs/grafana-datasources-mimir.yaml deleted file mode 100644 index 7e947c20..00000000 --- a/kubernetes/read-write-mode/metrics/configs/grafana-datasources-mimir.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: 1 - -deleteDatasources: -- name: Metrics - uid: metrics - -datasources: -# Mimir for metrics -- name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true diff --git a/kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml b/kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml index 635751cf..18b29580 100644 --- a/kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml +++ b/kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml @@ -1,875 +1,158 @@ apiVersion: v1 -data: - MIMIR_ALERT_MANAGER_HOST: mimir-backend.monitoring-system.svc.cluster.local - MIMIR_COMPACTOR_HOST: mimir-backend.monitoring-system.svc.cluster.local - MIMIR_DISTRIBUTOR_HOST: mimir-write.monitoring-system.svc.cluster.local - MIMIR_QUERY_FRONTEND_HOST: mimir-read.monitoring-system.svc.cluster.local - MIMIR_RULER_HOST: mimir-backend.monitoring-system.svc.cluster.local -kind: ConfigMap +kind: ServiceAccount metadata: - name: nginx-env - namespace: gateway + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system --- -apiVersion: v1 -data: - agent-cluster-node.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Node Info", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Lamport clock time" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Internal cluster state observers" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip health score" - }, - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "Gossip protocol version" - } - ], - "title": "Node Info", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #(.*)", - "renamePattern": "$1" - } - }, - { - "id": "reduce", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { }, - "indexByName": { }, - "renameByName": { - "Field": "Metric", - "Max": "Value" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{event}}", - "range": true - } - ], - "title": "Gossip ops/s", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:peers" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Known peers", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Known peers to the node by state (including the local node).\n", - "fieldConfig": { - "defaults": { - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "{{state}}", - "range": true - } - ], - "title": "Peers by state", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "title": "Gossip Transport", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Transport bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Packet write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", - "fieldConfig": { - "defaults": { - "unit": "pkts" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "tx queue", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "rx queue", - "range": true - } - ], - "title": "Pending packet queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": true - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "rx", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", - "instant": false, - "legendFormat": "tx", - "range": true - } - ], - "title": "Stream bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Tx success %", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", - "instant": false, - "legendFormat": "Rx success %", - "range": true - } - ], - "title": "Stream write success rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 26 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", - "instant": false, - "legendFormat": "Open streams", - "range": true - } - ], - "title": "Open transport streams", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "instance", - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Node", - "uid": "dd370cd333b2d9258435fb1b5a20a89b" - } -kind: ConfigMap +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin labels: - grafana_dashboard: "1" - name: agent-cluster-node.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +rules: +- apiGroups: + - "" + - discovery.k8s.io + - networking.k8s.io + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - podlogs + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent namespace: monitoring-system --- apiVersion: v1 data: - agent-cluster-overview.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Clustering documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Nodes", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Nodes info.\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Dashboard" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "index": 0, - "text": "Link" - } - }, - "type": "value" - } - ] - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Detail dashboard for node", - "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Node table", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": false, - "__name__": true, - "cluster": true, - "namespace": true, - "state": false - }, - "indexByName": { }, - "renameByName": { - "Value": "Dashboard", - "instance": "", - "state": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": "${datasource}", - "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "1": { - "color": "red", - "index": 1, - "text": "Not converged" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "color": "green", - "index": 0, - "text": "Converged" - } - }, - "type": "special" - } - ], - "unit": "suffix:nodes" - } - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 9 - }, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false - } - ], - "title": "Convergance state", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 80, - "spanNulls": true - }, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "text": "Yes" - } - }, - "type": "value" - }, - { - "options": { - "1": { - "color": "red", - "text": "No" - } - }, - "type": "value" - } - ], - "max": 1, - "noValue": 0 - } - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 8, - "y": 9 - }, - "options": { - "mergeValues": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", - "instant": false, - "legendFormat": "Converged", - "range": true - } - ], - "title": "Convergance state timeline", - "type": "state-timeline" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Cluster Overview", - "uid": "7e07f9c975fcfc2a6e120a95f579f843" - } + MIMIR_ALERT_MANAGER_HOST: mimir-backend.monitoring-system.svc.cluster.local + MIMIR_COMPACTOR_HOST: mimir-backend.monitoring-system.svc.cluster.local + MIMIR_DISTRIBUTOR_HOST: mimir-write.monitoring-system.svc.cluster.local + MIMIR_QUERY_FRONTEND_HOST: mimir-read.monitoring-system.svc.cluster.local + MIMIR_RULER_HOST: mimir-backend.monitoring-system.svc.cluster.local kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-cluster-overview.json - namespace: monitoring-system + name: nginx-env + namespace: gateway --- apiVersion: v1 data: config.river: "/*\nThe following example shows using the default all logs processing module, for\na single tenant and specifying the destination url/credentials via environment\nvariables.\n*/\nlogging {\n\tlevel = coalesce(env(\"AGENT_LOG_LEVEL\"), - \"info\")\n\tformat = \"logfmt\"\n}\n\nmodule.file \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), - \"/etc/agent-modules\") + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = - coalesce(env(\"CLUSTER\"), \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), + \"info\")\n\tformat = \"logfmt\"\n}\n\n/********************************************\n + * Grafana LGTMP Stack Receiver Provider\n ********************************************/\nmodule.file + \"lgtmp\" {\n\tfilename = coalesce(env(\"AGENT_CONFIG_FOLDER\"), \"/etc/agent-modules\") + + \"/lgtmp.river\"\n\n\targuments {\n\t\tcluster = coalesce(env(\"CLUSTER\"), + \"k3d-k3s-codelab\")\n\t\tlogs_endpoint = coalesce(env(\"LOGS_ENDPOINT\"), \"http://nginx.gateway.svc:3100\")\n\t\tmetrics_endpoint = coalesce(env(\"METRICS_ENDPOINT\"), \"http://nginx.gateway.svc:8080\")\n\t\tprofiles_endpoint = coalesce(env(\"PROFILES_ENDPOINT\"), \"http://nginx.gateway.svc:4040\")\n\t\ttraces_endpoint = coalesce(env(\"TRACES_ENDPOINT\"), @@ -884,9320 +167,485 @@ data: \ = \"monitoring-system\"\n\t\tforward_to = [module.file.lgtmp.exports.metrics_receiver]\n\t}\n}\n" kind: ConfigMap metadata: - name: agent-config + name: agent-config-9cc7gk9k2b namespace: monitoring-system --- apiVersion: v1 data: - agent-flow-controller.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component controller documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", - "fieldConfig": { - "defaults": { - "unit": "agents" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 0 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running agents", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The number of running components across all running agents.\n", - "fieldConfig": { - "defaults": { - "unit": "components" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 4 - }, - "options": { - "colorMode": "none", - "graphMode": "none" - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Running components", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "The percentage of components which are in a healthy state.\n", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "No components", - "unit": "percentunit" - } - }, - "gridPos": { - "h": 4, - "w": 10, - "x": 0, - "y": 8 - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "text": { - "valueSize": 80 - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Overall component health", - "type": "stat" - }, - { - "datasource": "${datasource}", - "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", - "fieldConfig": { - "defaults": { - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Unhealthy" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unknown" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "blue", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Exited" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 14, - "x": 10, - "y": 0 - }, - "options": { - "orientation": "vertical", - "showUnfilled": true - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Healthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", - "instant": true, - "legendFormat": "Unhealthy", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", - "instant": true, - "legendFormat": "Unknown", - "range": false - }, - { - "datasource": "${datasource}", - "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", - "instant": true, - "legendFormat": "Exited", - "range": false - } - ], - "title": "Components by health", - "type": "bargauge" - }, - { - "datasource": "${datasource}", - "description": "The frequency at which components get updated.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "options": { - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", - "instant": false, - "legendFormat": "__auto", - "range": true - } - ], - "title": "Component evaluation rate", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Component evaluation time", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Slow components evaluation times", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component evaluation histogram", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "cellGap": 0, - "color": { - "scheme": "Spectral" - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 0.10000000000000001 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Component dependency wait histogram", - "type": "heatmap" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Controller", - "uid": "f861e5fef2e795edd5c4c73bee1ba769" - } + MEMCACHED_SECRET_NAME: integrations-memcached + MYSQL_SECRET_NAME: integrations-mysql + REDIS_SECRET_NAME: integrations-redis + memcached.river: "/*\nModule: Memcached integrations\nDescription: Wrapper module + to integration Memcached metrics\n*/\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = true\n}\n\nargument \"name\" {\n\t// comment = \"Name of the secret for Memcached\"\n\toptional + = true\n\tdefault = \"integrations-memcached\"\n}\n\nargument \"namespace\" {\n\t// + comment = \"Namespace of the Memcached secret Integrations\"\n\toptional = true\n\tdefault + \ = \"default\"\n}\n\nargument \"instance\" {\n\t// comment = \"Instance of the + Memcached\"\n\toptional = true\n\tdefault = \"primary\"\n}\n\nargument \"forward_to\" + { }\n\nremote.kubernetes.secret \"memcached\" {\n\tname = argument.name.value\n\tnamespace + = argument.namespace.value\n}\n\n// Metrics\nprometheus.exporter.memcached \"integrations_memcached\" + {\n\taddress = nonsensitive(remote.kubernetes.secret.memcached.data[\"memcached-address\"])\n\ttimeout + = \"5s\"\n}\n\nprometheus.scrape \"memcached\" {\n\tclustering {\n\t\tenabled + = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation = true\n\tscrape_classic_histograms + \ = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.memcached.integrations_memcached.targets,\n\t)\n\tjob_name + \ = \"integrations/memcached\"\n\tforward_to = [prometheus.relabel.integrations_memcached.receiver]\n}\n\nprometheus.relabel + \"integrations_memcached\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + mysql.river: "/*\nModule: Mysql integrations\nDescription: Wrapper module to integration + mysql metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"name\" {\n\t// comment = \"Name of the secret for MySQL\"\n\toptional = true\n\tdefault + \ = \"integrations-mysql\"\n}\n\nargument \"namespace\" {\n\t// comment = \"Namespace + of the MySQL secret Integrations\"\n\toptional = true\n\tdefault = \"default\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Database\"\n\toptional = true\n\tdefault + \ = \"primary\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret + \"mysql\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.mysql \"integrations_mysql\" {\n\tdata_source_name + = nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-username\"]) + \":\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-password\"]) + \"@(\" + + nonsensitive(remote.kubernetes.secret.mysql.data[\"mysql-host\"]) + \")/\"\n}\n\nprometheus.scrape + \"mysql\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.mysql.integrations_mysql.targets,\n\t)\n\tjob_name + \ = \"integrations/mysql\"\n\tforward_to = [prometheus.relabel.integrations_mysql.receiver]\n}\n\nprometheus.relabel + \"integrations_mysql\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" + redis.river: "/*\nModule: Redis integrations\nDescription: Wrapper module to integration + Redis metrics\n*/\nargument \"clustering\" {\n\t// comment = \"Whether or not + clustering should be enabled\"\n\toptional = true\n\tdefault = true\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the Redis secret Integrations\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"name\" {\n\t// comment = \"Name + of the secret for Redis\"\n\toptional = true\n\tdefault = \"integrations-redis\"\n}\n\nargument + \"instance\" {\n\t// comment = \"Instance of the Redis\"\n\toptional = true\n\tdefault + \ = \"master\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.secret \"redis\" + {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n// + Metrics\nprometheus.exporter.redis \"integrations_redis\" {\n\tredis_addr = + nonsensitive(remote.kubernetes.secret.redis.data[\"redis-addr\"])\n\tredis_password + = nonsensitive(remote.kubernetes.secret.redis.data[\"redis-password\"])\n}\n\nprometheus.scrape + \"redis\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tenable_protobuf_negotiation + = true\n\tscrape_classic_histograms = true\n\n\ttargets = concat(\n\t\tprometheus.exporter.redis.integrations_redis.targets,\n\t)\n\tjob_name + \ = \"integrations/redis\"\n\tforward_to = [prometheus.relabel.integrations_redis.receiver]\n}\n\nprometheus.relabel + \"integrations_redis\" {\n\trule {\n\t\treplacement = argument.instance.value\n\t\ttarget_label + = \"instance\"\n\t}\n\tforward_to = argument.forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-controller.json + name: agent-integrations namespace: monitoring-system --- apiVersion: v1 data: - agent-flow-opentelemetry.json: |- - { - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "Receivers for traces [otelcol.receiver]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully pushed into the pipeline.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Accepted spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans that could not be pushed into the pipeline.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }} / {{ transport }}", - "range": true - } - ], - "title": "Refused spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "The duration of inbound RPCs.\n", - "fieldConfig": { - "defaults": { - "unit": "milliseconds" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "RPC server duration (traces)", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "title": "Batching [otelcol.processor.batch]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of units in the batch\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "maxDataPoints": 30, - "options": { - "calculate": false, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "scale": "exponential", - "scheme": "Oranges", - "steps": 65 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1.0000000000000001e-09 - }, - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "unit": "s" - } - }, - "pluginVersion": "9.0.6", - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "heatmap", - "instant": false, - "legendFormat": "{{le}}", - "range": true - } - ], - "title": "Number of units in the batch", - "type": "heatmap" - }, - { - "datasource": "${datasource}", - "description": "Number of distinct metadata value combinations being processed\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Distinct metadata values", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of times the batch was sent due to a timeout trigger\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Timeout trigger", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "title": "Exporters for traces [otelcol.exporter]", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Number of spans successfully sent to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported sent spans", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of spans in failed attempts to send to destination.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 20 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{ pod }}", - "range": true - } - ], - "title": "Exported failed spans", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / OpenTelemetry", - "uid": "c90e752eb8c0fce588f906b7279aceea" - } + integrations.river: "/*\nModule: Agent integrations\nDescription: Wrapper module + to include auto loading integrations\n*/\nargument \"name\" {\n\t// comment = + \"Name of the integrations config\"\n\toptional = true\n\tdefault = \"agent-integrations\"\n}\n\nargument + \"namespace\" {\n\t// comment = \"Namespace of the integrations config\"\n\toptional + = true\n\tdefault = \"default\"\n}\n\nargument \"forward_to\" { }\n\nremote.kubernetes.configmap + \"integrations\" {\n\tname = argument.name.value\n\tnamespace = argument.namespace.value\n}\n\n/********************************************\n + * Integrations Mysql\n ********************************************/\nmodule.string + \"mysql\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"mysql.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MYSQL_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Memcached\n ********************************************/\nmodule.string + \"memcached\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"memcached.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"MEMCACHED_SECRET_NAME\"]\n\t\tinstance + \ = \"primary\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n\n/********************************************\n + * Integrations Redis\n ********************************************/\nmodule.string + \"redis\" {\n\tcontent = remote.kubernetes.configmap.integrations.data[\"redis.river\"]\n\n\targuments + {\n\t\tnamespace = argument.namespace.value\n\t\tname = remote.kubernetes.configmap.integrations.data[\"REDIS_SECRET_NAME\"]\n\t\tinstance + \ = \"master\"\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + lgtmp.river: "/********************************************\n * ARGUMENTS\n ********************************************/\nargument + \"cluster\" {\n\toptional = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument + \"tenant\" {\n\toptional = true\n\tdefault = \"anonymous\"\n}\n\nargument \"metrics_endpoint\" + {\n\toptional = true\n\tdefault = \"http://mimir:8080\"\n\t//comment = \"Where + to send collected metrics.\"\n}\n\nargument \"logs_endpoint\" {\n\toptional = + true\n\tdefault = \"http://loki:3100\"\n\t//comment = \"Where to send collected + logs.\"\n}\n\nargument \"traces_endpoint\" {\n\toptional = true\n\tdefault = + \"tempo:4317\"\n\t//comment = \"Where to send collected traces.\"\n}\n\nargument + \"profiles_endpoint\" {\n\toptional = true\n\tdefault = \"http://pyroscope:4040\"\n\t//comment + \ = \"Where to send collected profiles.\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\n\nexport \"metrics_receiver\" + {\n\tvalue = prometheus.remote_write.mimir.receiver\n}\n\nexport \"logs_receiver\" + {\n\tvalue = loki.write.loki.receiver\n}\n\nexport \"traces_receiver\" {\n\tvalue + = otelcol.exporter.otlp.tempo.input\n}\n\nexport \"profiles_receiver\" {\n\tvalue + = pyroscope.write.pyroscope.receiver\n}\n\n/********************************************\n + * Endpoints\n ********************************************/\n\n// Metrics\nprometheus.remote_write + \"mimir\" {\n\tendpoint {\n\t\turl = argument.metrics_endpoint.value + + \"/api/v1/push\"\n\t\tsend_native_histograms = true\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Logs\nloki.write \"loki\" {\n\tendpoint {\n\t\turl = argument.logs_endpoint.value + + \"/loki/api/v1/push\"\n\t\ttenant_id = argument.tenant.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n\n// + Traces\notelcol.exporter.otlp \"tempo\" {\n\tclient {\n\t\tendpoint = argument.traces_endpoint.value\n\n\t\ttls + {\n\t\t\tinsecure = true\n\t\t\tinsecure_skip_verify = true\n\t\t}\n\t}\n}\n\n// + Profiles\npyroscope.write \"pyroscope\" {\n\tendpoint {\n\t\turl = argument.profiles_endpoint.value\n\t}\n\n\texternal_labels + = {\n\t\t\"scraped_by\" = \"grafana-agent\",\n\t\t\"cluster\" = argument.cluster.value,\n\t}\n}\n" + logs.river: "/*\nModule: logs\nDescription: Wrapper module to include all kubernetes + logging modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// comment + = \"Must be a list(LogsReceiver) where collected logs should be forwarded to\"\n\toptional + = false\n}\n\nargument \"tenant\" {\n\t// comment = \"The tenant to filter logs + to. This does not have to be the tenantId, this is the value to look for in the + logs.agent.grafana.com/tenant annotation, and this can be a regex.\"\n\toptional + = true\n\tdefault = \".*\"\n}\n\nargument \"keep_labels\" {\n\t// comment = \"List + of labels to keep before the log message is written to Loki\"\n\toptional = true\n\tdefault + \ = [\n\t\t\"app\",\n\t\t\"cluster\",\n\t\t\"component\",\n\t\t\"container\",\n\t\t\"deployment\",\n\t\t\"env\",\n\t\t\"filename\",\n\t\t\"instance\",\n\t\t\"job\",\n\t\t\"level\",\n\t\t\"log_type\",\n\t\t\"namespace\",\n\t\t\"region\",\n\t\t\"service\",\n\t\t\"squad\",\n\t\t\"team\",\n\t]\n}\n\nargument + \"git_repo\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REPO\"), \"https://github.com/grafana/agent-modules.git\")\n}\n\nargument + \"git_rev\" {\n\toptional = true\n\tdefault = coalesce(env(\"GIT_REV\"), env(\"GIT_REVISION\"), + env(\"GIT_BRANCH\"), \"main\")\n}\n\nargument \"git_pull_freq\" {\n\t// comment + = \"How often to pull the git repo, the default is 0s which means never pull\"\n\toptional + = true\n\tdefault = \"0s\"\n}\n\nmodule.git \"log_targets\" {\n\trepository = + argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/targets/logs-from-worker.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_formats_all.exports.process.receiver]\n\t\ttenant + \ = argument.tenant.value\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"log_formats_all\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/log-formats/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.log_level_default.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"log_level_default\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/labels/log-level.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.label_normalize_filename.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"label_normalize_filename\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/normalize-filename.river\"\n\n\targuments + {\n\t\t// here we fork, one branch goes to the log level module, the other goes + to the metrics module\n\t\t// this is because we need to reduce the labels on + the pre-metrics but they are still necessary in\n\t\t// downstream modules\n\t\tforward_to + = [\n\t\t\tmodule.git.pre_process_metrics.exports.process.receiver,\n\t\t\tmodule.git.drop_levels.exports.process.receiver,\n\t\t]\n\t}\n}\n\nmodule.git + \"pre_process_metrics\" {\n\trepository = argument.git_repo.value\n\trevision + \ = argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/metrics/pre-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.drop_levels.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"drop_levels\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/drops/levels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.scrub_all.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"scrub_all\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/scrubs/all.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.embed_pod.exports.process.receiver]\n\t\tgit_repo + \ = argument.git_repo.value\n\t\tgit_rev = argument.git_rev.value\n\t\tgit_pull_freq + = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git \"embed_pod\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/embed/pod.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.mask_all.exports.process.receiver]\n\t}\n}\n\nmodule.git + \"mask_all\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/masks/all.river\"\n\n\targuments {\n\t\tforward_to + \ = [module.git.label_keep.exports.process.receiver]\n\t\tgit_repo = argument.git_repo.value\n\t\tgit_rev + \ = argument.git_rev.value\n\t\tgit_pull_freq = argument.git_pull_freq.value\n\t}\n}\n\nmodule.git + \"label_keep\" {\n\trepository = argument.git_repo.value\n\trevision = + argument.git_rev.value\n\tpull_frequency = argument.git_pull_freq.value\n\tpath + \ = \"modules/kubernetes/logs/labels/keep-labels.river\"\n\n\targuments + {\n\t\tforward_to = [module.git.post_process_metrics.exports.process.receiver]\n\t\tkeep_labels + = argument.keep_labels.value\n\t}\n}\n\nmodule.git \"post_process_metrics\" {\n\trepository + \ = argument.git_repo.value\n\trevision = argument.git_rev.value\n\tpull_frequency + = argument.git_pull_freq.value\n\tpath = \"modules/kubernetes/logs/metrics/post-process-bytes-lines.river\"\n\n\targuments + {\n\t\tforward_to = argument.forward_to.value\n\t}\n}\n" + metrics.river: "/*\nModule: metrics-all\nDescription: Wrapper module to include + all kubernetes metric modules and use cri parsing\n*/\nargument \"forward_to\" + {\n\t// comment = \"Must be a list(MetricssReceiver) where collected logs should + be forwarded to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment + = \"Whether or not clustering should be enabled\"\n\toptional = true\n\tdefault + \ = false\n}\n\n/********************************************\n * Kubernetes Auto + Scrape ServiceMonitor\n ********************************************/\nprometheus.operator.servicemonitors + \"auto_scrape_servicemonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n}\n\n/********************************************\n + * Kubernetes Auto Scrape PodMonitors\n ********************************************/\nprometheus.operator.podmonitors + \"auto_scrape_podmonitors\" {\n\tforward_to = argument.forward_to.value\n\n\tclustering + {\n\t\tenabled = argument.clustering.value\n\t}\n\n\tselector {\n\t\tmatch_expression + {\n\t\t\tkey = \"team\"\n\t\t\toperator = \"In\"\n\t\t\tvalues = [\"team-infra\"]\n\t\t}\n\t}\n}\n\n/********************************************\n + * Kubernetes Prometheus Rules To Mimir\n ********************************************/\nmimir.rules.kubernetes + \"prometheus_rules_to_mimir\" {\n\taddress = coalesce(env(\"METRICS_ENDPOINT\"), + \"http://nginx.gateway.svc:8080\")\n\ttenant_id = \"anonymous\"\n}\n" + profiles.river: "/*\nModule: profiles\nDescription: Wrapper module to include all + kubernetes profile modules and use cri parsing\n*/\nargument \"forward_to\" {\n\t// + comment = \"Must be a list(ProfilessReceiver) where collected logs should be forwarded + to\"\n\toptional = false\n}\n\nargument \"clustering\" {\n\t// comment = \"Whether + or not clustering should be enabled\"\n\toptional = true\n\tdefault = false\n}\n\ndiscovery.kubernetes + \"pyroscope_kubernetes\" {\n\trole = \"pod\"\n}\n\n// The default scrape config + allows to define annotations based scraping.\n//\n// For example the following + annotations:\n//\n// ```\n// profiles.grafana.com/memory.scrape: \"true\"\n// + profiles.grafana.com/memory.port: \"8080\"\n// profiles.grafana.com/cpu.scrape: + \"true\"\n// profiles.grafana.com/cpu.port: \"8080\"\n// profiles.grafana.com/goroutine.scrape: + \"true\"\n// profiles.grafana.com/goroutine.port: \"8080\"\n// ```\n//\n// will + scrape the `memory`, `cpu` and `goroutine` profiles from the `8080` port of the + pod.\n//\n// For more information see https://grafana.com/docs/phlare/latest/operators-guide/deploy-kubernetes/#optional-scrape-your-own-workloads-profiles\ndiscovery.relabel + \"kubernetes_pods\" {\n\ttargets = concat(discovery.kubernetes.pyroscope_kubernetes.targets)\n\n\trule + {\n\t\taction = \"drop\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_phase\"]\n\t\tregex + \ = \"Pending|Succeeded|Failed|Completed\"\n\t}\n\n\trule {\n\t\taction + = \"labelmap\"\n\t\tregex = \"__meta_kubernetes_pod_label_(.+)\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_namespace\"]\n\t\ttarget_label + \ = \"namespace\"\n\t}\n\n\trule {\n\t\taction = \"replace\"\n\t\tsource_labels + = [\"__meta_kubernetes_pod_name\"]\n\t\ttarget_label = \"pod\"\n\t}\n\n\trule + {\n\t\taction = \"replace\"\n\t\tsource_labels = [\"__meta_kubernetes_pod_container_name\"]\n\t\ttarget_label + \ = \"container\"\n\t}\n}\n\ndiscovery.relabel \"kubernetes_pods_memory_default_name\" + {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_memory_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_memory_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Memory\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_memory\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_memory_default_name.output, discovery.relabel.kubernetes_pods_memory_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = true\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_cpu_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_cpu_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape CPU\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_cpu\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_cpu_default_name.output, discovery.relabel.kubernetes_pods_cpu_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_goroutine_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_goroutine_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Goroutine\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_goroutine\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_goroutine_default_name.output, + discovery.relabel.kubernetes_pods_goroutine_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_block_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_block_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Block\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_block\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_block_default_name.output, discovery.relabel.kubernetes_pods_block_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_mutex_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_mutex_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Mutex\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_mutex\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_mutex_default_name.output, discovery.relabel.kubernetes_pods_mutex_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = true\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = false\n\t\t}\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_default_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\ndiscovery.relabel + \"kubernetes_pods_fgprof_custom_name\" {\n\ttargets = concat(discovery.relabel.kubernetes_pods.output)\n\n\trule + {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scrape\"]\n\t\taction + \ = \"keep\"\n\t\tregex = \"true\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"]\n\t\taction + \ = \"drop\"\n\t\tregex = \"\"\n\t}\n\n\trule {\n\t\tsource_labels + = [\"__meta_kubernetes_pod_container_port_name\"]\n\t\ttarget_label = \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port_name\"\n\t\taction + \ = \"keepequal\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_scheme\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(https?)\"\n\t\ttarget_label = \"__scheme__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_path\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+)\"\n\t\ttarget_label = \"__profile_path__\"\n\t\treplacement + \ = \"$1\"\n\t}\n\n\trule {\n\t\tsource_labels = [\"__address__\", \"__meta_kubernetes_pod_annotation_profiles_grafana_com_fgprof_port\"]\n\t\taction + \ = \"replace\"\n\t\tregex = \"(.+?)(?::\\\\d+)?;(\\\\d+)\"\n\t\ttarget_label + \ = \"__address__\"\n\t\treplacement = \"$1:$2\"\n\t}\n}\n\n/********************************************\n + * Kubernetes Pyroscope Scrape Fgprof\n ********************************************/\npyroscope.scrape + \"pyroscope_scrape_fgprof\" {\n\tclustering {\n\t\tenabled = argument.clustering.value\n\t}\n\n\ttargets + \ = concat(discovery.relabel.kubernetes_pods_fgprof_default_name.output, discovery.relabel.kubernetes_pods_fgprof_custom_name.output)\n\tforward_to + = argument.forward_to.value\n\n\tprofiling_config {\n\t\tprofile.memory {\n\t\t\tenabled + = false\n\t\t}\n\n\t\tprofile.process_cpu {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.goroutine + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.block {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.mutex + {\n\t\t\tenabled = false\n\t\t}\n\n\t\tprofile.fgprof {\n\t\t\tenabled = true\n\t\t}\n\t}\n}\n" + traces.river: "/*\nModule: traces\n*/\n\n/********************************************\n + * ARGUMENTS\n ********************************************/\nargument \"traces_forward_to\" + {\n\toptional = false\n}\n\nargument \"logs_forward_to\" {\n\toptional = false\n}\n\nargument + \"metrics_forward_to\" {\n\toptional = false\n}\n\nargument \"cluster\" {\n\toptional + = true\n\tdefault = \"k3d-k3s-codelab\"\n}\n\nargument \"otlp_http_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4318\"\n}\n\nargument \"otlp_grpc_endpoint\" + {\n\toptional = true\n\tdefault = \"0.0.0.0:4317\"\n}\n\n/********************************************\n + * EXPORTS\n ********************************************/\nexport \"agent_traces_input\" + {\n\tvalue = otelcol.processor.batch.default.input\n}\n\n/********************************************\n + * Jaeger for Metrics Logs Traces\n ********************************************/\n\notelcol.receiver.jaeger + \"default\" {\n\tprotocols {\n\t\tgrpc {\n\t\t\tendpoint = \"0.0.0.0:14250\"\n\t\t}\n\n\t\tthrift_http + {\n\t\t\tendpoint = \"0.0.0.0:14268\"\n\t\t}\n\n\t\tthrift_binary {\n\t\t\tendpoint + = \"0.0.0.0:6832\"\n\t\t}\n\n\t\tthrift_compact {\n\t\t\tendpoint = \"0.0.0.0:6831\"\n\t\t}\n\t}\n\n\toutput + {\n\t\tmetrics = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [otelcol.processor.resourcedetection.default.input]\n\t}\n}\n\n/********************************************\n + * Otelcol for Metrics Logs Traces\n ********************************************/\n// + https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.receiver.otlp/\notelcol.receiver.otlp + \"default\" {\n\tgrpc {\n\t\tendpoint = argument.otlp_grpc_endpoint.value\n\t}\n\n\thttp + {\n\t\tendpoint = argument.otlp_http_endpoint.value\n\t}\n\n\toutput {\n\t\tmetrics + = [otelcol.processor.batch.default.input]\n\t\tlogs = [otelcol.processor.resourcedetection.default.input]\n\t\ttraces + \ = [\n\t\t\totelcol.processor.resourcedetection.default.input,\n\t\t\totelcol.connector.spanlogs.autologging.input,\n\t\t]\n\t}\n}\n\notelcol.processor.resourcedetection + \"default\" {\n\tdetectors = [\"env\"]\n\n\toutput {\n\t\tlogs = [otelcol.processor.k8sattributes.default.input]\n\t\ttraces + = [otelcol.processor.k8sattributes.default.input]\n\t}\n}\n\notelcol.processor.k8sattributes + \"default\" {\n\textract {\n\t\tmetadata = [\n\t\t\t\"k8s.namespace.name\",\n\t\t\t\"k8s.pod.name\",\n\t\t\t\"k8s.deployment.name\",\n\t\t\t\"k8s.statefulset.name\",\n\t\t\t\"k8s.daemonset.name\",\n\t\t\t\"k8s.cronjob.name\",\n\t\t\t\"k8s.job.name\",\n\t\t\t\"k8s.node.name\",\n\t\t\t\"k8s.pod.uid\",\n\t\t\t\"k8s.pod.start_time\",\n\t\t]\n\t}\n\n\tpod_association + {\n\t\tsource {\n\t\t\tfrom = \"connection\"\n\t\t}\n\t}\n\n\toutput {\n\t\tlogs + \ = [otelcol.processor.transform.add_resource_attributes.input]\n\t\ttraces = + [otelcol.processor.transform.add_resource_attributes.input]\n\t}\n}\n\notelcol.processor.transform + \"add_resource_attributes\" {\n\terror_mode = \"ignore\"\n\n\tlog_statements {\n\t\tcontext + \ = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])`,\n\t\t\t`set(attributes[\"namespace\"], + attributes[\"k8s.namespace.name\"])`,\n\t\t\t`set(attributes[\"loki.resource.labels\"], + \"pod, namespace, cluster, job\")`,\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\ttrace_statements + {\n\t\tcontext = \"resource\"\n\t\tstatements = [\n\t\t\t`set(attributes[\"k8s.cluster.name\"], + \"k3d-k3s-codelab\") where attributes[\"k8s.cluster.name\"] == nil`,\n\t\t]\n\t}\n\n\toutput + {\n\t\tlogs = [otelcol.processor.filter.default.input]\n\t\ttraces = [otelcol.processor.filter.default.input]\n\t}\n}\n\notelcol.processor.filter + \"default\" {\n\terror_mode = \"ignore\"\n\n\toutput {\n\t\tlogs = [otelcol.processor.batch.default.input]\n\t\ttraces + = [otelcol.processor.batch.default.input]\n\t}\n}\n\notelcol.processor.batch \"default\" + {\n\tsend_batch_size = 16384\n\tsend_batch_max_size = 0\n\ttimeout = + \"5s\"\n\n\toutput {\n\t\tmetrics = [otelcol.processor.memory_limiter.default.input]\n\t\tlogs + \ = [otelcol.processor.memory_limiter.default.input]\n\t\ttraces = [otelcol.processor.memory_limiter.default.input]\n\t}\n}\n\notelcol.processor.memory_limiter + \"default\" {\n\tcheck_interval = \"1s\"\n\tlimit_percentage = 50\n\tspike_limit_percentage + = 30\n\n\toutput {\n\t\tmetrics = [otelcol.exporter.prometheus.tracesmetrics.input]\n\t\tlogs + \ = [otelcol.exporter.loki.traceslogs.input]\n\t\ttraces = argument.traces_forward_to.value\n\t}\n}\n\notelcol.exporter.prometheus + \"tracesmetrics\" {\n\tforward_to = argument.metrics_forward_to.value\n}\n\notelcol.exporter.loki + \"traceslogs\" {\n\tforward_to = [loki.process.traceslogs.receiver]\n}\n\n// The + OpenTelemetry spanlog connector processes incoming trace spans and extracts data + from them ready\n// for logging.\notelcol.connector.spanlogs \"autologging\" {\n\t// + We only want to output a line for each root span (ie. every single trace), and + not for every\n\t// process or span (outputting a line for every span would be + extremely verbose).\n\tspans = false\n\troots = true\n\tprocesses = false\n\n\t// + We want to ensure that the following three span attributes are included in the + log line, if present.\n\tspan_attributes = [\n\t\t\"http.method\",\n\t\t\"http.target\",\n\t\t\"http.status_code\",\n\t]\n\n\t// + Overrides the default key in the log line to be `traceId`, which is then used + by Grafana to\n\t// identify the trace ID for correlation with the Tempo datasource.\n\toverrides + {\n\t\ttrace_id_key = \"traceId\"\n\t}\n\n\t// Send to the OpenTelemetry Loki + exporter.\n\toutput {\n\t\tlogs = [otelcol.exporter.loki.autologging.input]\n\t}\n}\n\n// + Simply forwards the incoming OpenTelemetry log format out as a Loki log.\n// We + need this stage to ensure we can then process the logline as a Loki object.\notelcol.exporter.loki + \"autologging\" {\n\tforward_to = [loki.process.autologging.receiver]\n}\n\n// + The Loki processor allows us to accept a correctly formatted Loki log and mutate + it into\n// a set of fields for output.\nloki.process \"autologging\" {\n\t// + The JSON stage simply extracts the `body` (the actual logline) from the Loki log, + ignoring\n\t// all other fields.\n\tstage.json {\n\t\texpressions = {\"body\" + = \"\"}\n\t}\n\t// The output stage takes the body (the main logline) and uses + this as the source for the output\n\t// logline. In this case, it essentially + turns it into logfmt.\n\tstage.output {\n\t\tsource = \"body\"\n\t}\n\n\tforward_to + = [loki.process.traceslogs.receiver]\n}\n\nloki.process \"traceslogs\" {\n\tstage.tenant + {\n\t\tvalue = \"anonymous\"\n\t}\n\n\tforward_to = argument.logs_forward_to.value\n}\n" kind: ConfigMap metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-opentelemetry.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-prometheus-remote-write.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "icon": "doc", - "targetBlank": true, - "title": "Documentation", - "tooltip": "Component documentation", - "type": "link", - "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" - }, - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "title": "prometheus.scrape", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "% of targets successfully scraped", - "range": true - } - ], - "title": "Scrape success rate in $cluster", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p99", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p95", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", - "instant": false, - "legendFormat": "p50", - "range": true - } - ], - "title": "Scrape duration in $cluster", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "title": "prometheus.remote_write", - "type": "row" - }, - { - "datasource": "${datasource}", - "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "WAL delay", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Data write throughput", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "unit": "s" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "99th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", - "instant": false, - "legendFormat": "50th percentile", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Average", - "range": true - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Minimum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Maximum" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 15 - ], - "fill": "dash" - } - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Minimum", - "range": true - }, - { - "datasource": "${datasource}", - "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", - "instant": false, - "legendFormat": "Maximum", - "range": true - } - ], - "title": "Shards", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total outgoing samples sent by prometheus.remote_write.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Sent samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Failed samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 20, - "gradientMode": "hue", - "stacking": { - "mode": "normal" - } - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 22 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Retried samples / second", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "showLegend": false - } - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "Series", - "range": true - } - ], - "title": "Active series (total)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", - "instant": false, - "legendFormat": "{{instance}} / {{component_id}}", - "range": true - } - ], - "title": "Active series (by instance/component)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", - "fieldConfig": { - "defaults": { - "unit": "short" - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 32 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", - "instant": false, - "legendFormat": "{{component_id}}", - "range": true - } - ], - "title": "Active series (by component)", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "component", - "multi": true, - "name": "component", - "query": { - "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", - "refId": "component" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "url", - "multi": true, - "name": "url", - "query": { - "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", - "refId": "url" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Prometheus Components", - "uid": "ee34ffa2d084547d650e1d96a26306aa" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-prometheus-remote-write.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - agent-flow-resources.json: |- - { - "annotations": { - "list": [ - { - "datasource": "$loki_datasource", - "enable": true, - "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", - "iconColor": "rgba(0, 211, 255, 1)", - "instant": false, - "name": "Deployments", - "titleFormat": "{{cluster}}/{{namespace}}" - } - ] - }, - "graphTooltip": 1, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "grafana-agent-flow-mixin" - ], - "targetBlank": false, - "title": "Dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "${datasource}", - "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", - "fieldConfig": { - "defaults": { - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Resident memory size of the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate at which the Grafana Agent process performs garbage collections.\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "points", - "pointSize": 3 - }, - "unit": "ops" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Garbage collections", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", - "fieldConfig": { - "defaults": { - "unit": "none" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Heap memory currently in use by the Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "unit": "decbytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Memory (heap inuse)", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "none", - "stacking": { - "mode": "normal" - } - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "targets": [ - { - "datasource": "${datasource}", - "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", - "instant": false, - "legendFormat": "{{instance}}", - "range": true - } - ], - "title": "Network send bandwidth", - "type": "timeseries" - } - ], - "refresh": "10s", - "schemaVersion": 36, - "tags": [ - "grafana-agent-flow-mixin" - ], - "templating": { - "list": [ - { - "label": "Data Source", - "name": "datasource", - "query": "prometheus", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "label": "Loki Data Source", - "name": "loki_datasource", - "query": "loki", - "refresh": 1, - "sort": 2, - "type": "datasource" - }, - { - "datasource": "${datasource}", - "label": "cluster", - "name": "cluster", - "query": { - "query": "label_values(agent_component_controller_running_components, cluster)\n", - "refId": "cluster" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "datasource": "${datasource}", - "label": "namespace", - "name": "namespace", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", - "refId": "namespace" - }, - "refresh": 2, - "sort": 2, - "type": "query" - }, - { - "allValue": ".*", - "datasource": "${datasource}", - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "query": { - "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", - "refId": "instance" - }, - "refresh": 2, - "sort": 2, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d", - "90d" - ] - }, - "timezone": "utc", - "title": "Grafana Agent Flow / Resources", - "uid": "d47aae5c53be5550f8e3bc8a904ba61a" - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Agent Flow Mixin - labels: - grafana_dashboard: "1" - name: agent-flow-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - go-runtime.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Go runtime metrics", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 14, - "iteration": 1623758038990, - "links": [ ], - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average total bytes of memory reserved across all process instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Total Reserved Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average stack memory usage across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: stack inuse (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Stack Memory Use", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mspan (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: mcache (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{instance}}: buck hash (avg)", - "refId": "E" - }, - { - "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: gc (avg)", - "refId": "F" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Other Memory Reservations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap reserved (avg)", - "refId": "B" - }, - { - "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: heap in use (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap alloc (avg)", - "refId": "C" - }, - { - "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap idle (avg)", - "refId": "D" - }, - { - "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", - "interval": "", - "legendFormat": "{{job}}: heap released (avg)", - "refId": "E" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average allocation rate in bytes per second, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "{{job}}: bytes malloced/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Allocation Rate, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average rate of heap object allocation, across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{job}}: obj mallocs/s (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Heap Object Allocation Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of live memory objects across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: object count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Number of Live Objects", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Average number of goroutines across instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: goroutine count (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: min gc time (avg)", - "refId": "A" - }, - { - "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}}: max gc time (avg)", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "GC min & max duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", - "fieldConfig": { - "defaults": { - "links": [ ] - }, - "overrides": [ ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{job}} next gc bytes (avg)", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Next GC, Bytes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "go-runtime" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "MONITORING", - "value": "MONITORING" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "pilot", - "value": "pilot" - }, - "datasource": "$datasource", - "definition": "label_values(go_info, job)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "job", - "multi": false, - "name": "job", - "options": [ ], - "query": { - "query": "label_values(go_info, job)", - "refId": "MONITORING-job-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "definition": "label_values(go_info{job=\"$job\"}, instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": { - "query": "label_values(go_info{job=\"$job\"}, instance)", - "refId": "MONITORING-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Go runtime metrics", - "uid": "T4sSTLBGzgp", - "version": 1 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Go - Runtime - labels: - grafana_dashboard: "1" - name: go-runtime.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - datasources.yaml: | - apiVersion: 1 - - deleteDatasources: - - name: Metrics - uid: metrics - - datasources: - # Mimir for metrics - - name: Metrics - type: prometheus - uid: metrics - access: proxy - url: http://nginx.gateway.svc.cluster.local:8080/prometheus - basicAuth: false - isDefault: true - version: 1 - editable: true -kind: ConfigMap -metadata: - labels: - grafana_datasource: "1" - name: grafana-datasources-5c8h5c4899 - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager resources", - "uid": "a6883fb22799ac74479c7db872451092", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-alertmanager.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total alerts", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total silences", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "APS", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts received", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "per pod Active Aggregation Groups", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alerts grouping", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alert notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod tenants", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Per pod silences", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Replication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Syncs/sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Syncs/sec (by reason)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "errors", - "legendLink": null - } - ], - "title": "Ring check errors/sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant configuration sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "{{outcome}}", - "legendLink": null - } - ], - "title": "Initial syncs /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "interval": "1m", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Initial sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "1m", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "interval": "1m", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Fetch state from other alertmanagers /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding initial state sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Replicate state to other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Merge state from other alertmanagers /sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Persist state to remote storage /sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sharding runtime state sync", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Alertmanager", - "uid": "b0d38d318bbddd80476246d4930f9e55", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-alertmanager.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU and memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-compactor.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "completed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "started" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#34CCEB", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "started", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "completed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Per-instance runs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", - "fieldConfig": { - "defaults": { - "max": 1, - "noValue": 1, - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Tenants compaction progress", - "type": "timeseries" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "No compactor data", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "text", - "text": "No successful runs since startup yet" - }, - "to": 0 - }, - "type": "range" - } - ] - }, - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 7200 - }, - { - "color": "orange", - "value": 21600 - }, - { - "color": "red", - "value": 43200 - } - ] - } - } - ] - } - ] - }, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "reduceOptions": { - "calcs": [ - "first" - ], - "fields": "/^Last run$/", - "values": false - }, - "textMode": "value" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Longest time since last successful run", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - } - ], - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Status" - }, - "properties": [ - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "color": "transparent", - "text": "N/A" - }, - "to": 0 - }, - "type": "range" - }, - { - "options": { - "from": 0, - "result": { - "color": "green", - "text": "Ok" - }, - "to": 7200 - }, - "type": "range" - }, - { - "options": { - "from": 7200, - "result": { - "color": "yellow", - "text": "Delayed" - }, - "to": 21600 - }, - "type": "range" - }, - { - "options": { - "from": 21600, - "result": { - "color": "orange", - "text": "Late" - }, - "to": 43200 - }, - "type": "range" - }, - { - "options": { - "from": 43200, - "result": { - "color": "red", - "text": "Very late" - }, - "to": "Infinity" - }, - "type": "range" - }, - { - "options": { - "match": "null+nan", - "result": { - "color": "transparent", - "text": "Unknown" - } - }, - "type": "special" - } - ] - }, - { - "id": "custom.width", - "value": 86 - }, - { - "id": "custom.align", - "value": "center" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Last run" - }, - "properties": [ - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.width", - "value": 74 - }, - { - "id": "mappings", - "value": [ - { - "options": { - "from": "-Infinity", - "result": { - "text": "Never" - }, - "to": 0 - }, - "type": "range" - } - ] - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", - "format": "table", - "instant": true, - "legendFormat": "Last run", - "legendLink": null - } - ], - "title": "Last successful run per-compactor replica", - "transformations": [ - { - "id": "organize", - "options": { - "renameByName": { - "Value": "Last run", - "pod": "Compactor" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "desc": true, - "field": "Last run" - } - ] - } - }, - { - "id": "calculateField", - "options": { - "alias": "One", - "binary": { - "left": "Last run", - "operator": "/", - "right": "Last run" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "calculateField", - "options": { - "alias": "Status", - "binary": { - "left": "Last run", - "operator": "*", - "right": "One" - }, - "mode": "binary", - "replaceFields": false - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Compactor", - "Last run", - "Status" - ] - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", - "format": "time_series", - "legendFormat": "Jobs", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "compactions", - "legendLink": null - } - ], - "title": "TSDB compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "TSDB compaction duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "Average blocks / tenant", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Tenants with largest number of blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks marked for deletion / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks deletions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Garbage collector", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Metadata syncs / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Metadata sync duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Metadata sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Object Store", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Key-value store for compactors ring", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-compactor.json + name: agent-modules-cf8t5bf7t9 namespace: monitoring-system --- apiVersion: v1 @@ -10339,33181 +787,120 @@ metadata: --- apiVersion: v1 data: - mimir-config.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Startup config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Startup config file", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "instances" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", - "format": "time_series", - "legendFormat": "sha256:{{sha256}}", - "legendLink": null - } - ], - "title": "Runtime config file hashes", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Runtime config file", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Config", - "uid": "5d9d0b4724c0f80d68467088ec61e003", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-config.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-object-store.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "RPS / component", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{component}}", - "legendLink": null - } - ], - "title": "Error rate / component", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Components", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "RPS / operation", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate / operation", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Operations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Object Store", - "uid": "e1324ee2a434f4158c00a9ee279d3292", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-object-store.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overrides.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 1, - "span": 12, - "targets": [ - { - "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Defaults", - "transformations": [ - { - "id": "labelsToFields", - "options": { } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Value": 1, - "limit_name": 0 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "limit_name" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${datasource}", - "id": 2, - "span": 12, - "targets": [ - { - "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Per-tenant overrides", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "limit_name" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "user": 0 - } - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overrides", - "uid": "1e2c358600ac53f09faea133f811b5bb", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overrides.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Backend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview resources", - "uid": "a9b92d3c4d1af325d872a9e9a7083d71", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-overview.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#7EB26D", - "value": null - }, - { - "color": "#EAB839", - "value": 0.01 - }, - { - "color": "#E24D42", - "value": 0.050000000000000003 - } - ] - } - } - }, - "id": 2, - "options": { - "showValue": "never" - }, - "span": 6, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Writes", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Reads", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Rule evaluations", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", - "instant": false, - "legendFormat": "Alerting notifications", - "range": true - }, - { - "datasource": { - "uid": "$datasource" - }, - "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "instant": false, - "legendFormat": "Object storage", - "range": true - } - ], - "title": "Status", - "type": "state-timeline" - }, - { - "id": 3, - "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", - "alertName": "Mimir", - "dashboardAlerts": false, - "maxItems": 100, - "sortOrder": 3, - "stateFilter": { - "error": true, - "firing": true, - "noData": false, - "normal": false, - "pending": false - } - }, - "span": 3, - "title": "Firing alerts", - "type": "alertlist" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Mimir cluster health", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 4, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Write requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Write latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "cps" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "samples / sec", - "legendLink": null - }, - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "exemplars / sec", - "legendLink": null - } - ], - "title": "Ingestion / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", - "datasource": null, - "description": "", - "id": 8, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Read requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Read latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "\"active series\" queries", - "color": "#C788DE" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "instant queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_active_series\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"active series\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_names\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label name cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=\"prometheus_api_v1_cardinality_label_values\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "\"label value cardinality\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars|cardinality_.*)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "other", - "legendLink": null - } - ], - "title": "Queries / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", - "datasource": null, - "description": "", - "id": 12, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Rule evaluations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Rule evaluations latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Alerting notifications sent to Alertmanager / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Recording and alerting rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", - "datasource": null, - "description": "", - "id": 16, - "mode": "markdown", - "span": 3, - "title": "", - "transparent": true, - "type": "text" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "seriesOverrides": [ - { - "alias": "attributes", - "color": "#429D48" - }, - { - "alias": "delete", - "color": "#F1C731" - }, - { - "alias": "exists", - "color": "#2A66CF" - }, - { - "alias": "get", - "color": "#9E44C1" - }, - { - "alias": "get_range", - "color": "#FFAB57" - }, - { - "alias": "iter", - "color": "#C79424" - }, - { - "alias": "upload", - "color": "#84D586" - } - ], - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Total number of blocks in the storage", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Long-term storage (object storage)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-overview.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Retries", - "type": "timeseries", - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Queue duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Queue length (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "Queue length (per user)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "splitting rate", - "legendLink": null - } - ], - "title": "Intervals per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", - "format": "time_series", - "legendFormat": "{{request_type}}", - "legendLink": null - } - ], - "title": "Query results cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Query results cache skipped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query splitting and results cache", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "sharded queries ratio", - "legendLink": null - } - ], - "title": "Sharded queries ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of sharded queries per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend - query sharding", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Series per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Samples per query", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Exemplars per query", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failure Rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failure Rate", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected queries", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Max", - "legendLink": null - }, - { - "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Min", - "legendLink": null - }, - { - "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", - "format": "time_series", - "legendFormat": "Average", - "legendLink": null - } - ], - "title": "Bucket indexes loaded (per querier)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Bucket indexes load / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Bucket indexes load latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "blocks", - "legendLink": null - } - ], - "title": "Blocks queried / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data fetched / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "binBps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{data_type}}", - "legendLink": null - } - ], - "title": "Data touched / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request average latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", - "format": "time_series", - "legendFormat": "{{stage}}", - "legendLink": null - } - ], - "title": "Series request 99th percentile latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", - "format": "time_series", - "legendFormat": "% of time reduced by preloading", - "legendLink": null - } - ], - "title": "Series batch preloading efficiency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Blocks currently owned", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks loaded / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Blocks dropped / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Lazy loaded index-headers", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load duration", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Index-header lazy load gate latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Series hash cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "ExpandedPostings cache hit ratio", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "hit ratio", - "legendLink": null - } - ], - "title": "Chunks attributes in-memory cache hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Queries", - "uid": "b3abe8d5c040395cc36615cb4334c92d", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-queries.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-networking.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Instant queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Range queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label names queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Label values queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", - "fill": 1, - "format": "reqps", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Series queries / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "Requests/s", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Cache – query results", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "legendFormat": "{{item_type}}", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – block index cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – chunks cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (getmulti)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 40, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", - "format": "time_series", - "legendFormat": "items", - "legendLink": null - } - ], - "title": "Hit ratio", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memcached – metadata cache (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 41, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 42, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 43, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 44, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (store-gateway accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 45, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 46, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 47, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 48, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 49, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 50, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 51, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 52, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Blocks object store (querier accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 53, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 54, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 55, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 56, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads resources", - "uid": "1940f6ef765a506a171faa2056c956c3", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads-resources.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-remote-ruler-reads.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "175px", - "panels": [ - { - "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Remote ruler reads dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", - "fill": 1, - "format": "reqps", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Evaluations / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-frontend (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency (Time in Queue)", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "queries" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", - "format": "time_series", - "legendFormat": "Queue length", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler (dedicated to ruler)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "99th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "A" - } - ], - "title": "50th Percentile Latency by Queue Dimension", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", - "format": "time_series", - "legendFormat": "Average: {{ additional_queue_dimensions }}", - "refId": "C" - } - ], - "title": "Average Latency by Queue Dimension", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Querier (dedicated to ruler)", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Remote ruler reads", - "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-remote-ruler-reads.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-rollout-progress.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Ready" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Updated" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 10, - "x": 0, - "y": 0 - }, - "id": 1, - "links": [ ], - "options": { - "barRadius": 0, - "barWidth": 0.96999999999999997, - "fullHighlight": false, - "groupWidth": 0.69999999999999996, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "xField": "Workload", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0 - }, - "targets": [ - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - }, - { - "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", - "format": "table", - "instant": true, - "intervalFactor": null, - "legendFormat": "__auto", - "legendLink": null, - "step": null - } - ], - "title": "Rollout progress", - "transformations": [ - { - "id": "joinByField", - "options": { - "byField": "workload", - "mode": "outer" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time 1": true, - "Time 2": true - }, - "renameByName": { - "Value #A": "Updated", - "Value #B": "Ready", - "workload": "Workload" - } - } - }, - { - "id": "sortBy", - "options": { - "sort": [ - { - "field": "Workload" - } - ] - } - } - ], - "type": "barchart" - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 0 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.20000000000000001 - }, - { - "color": "red", - "value": 0.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Writes 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 10, - "y": 4 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 2xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 0.01 - }, - { - "color": "red", - "value": 0.050000000000000003 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 12, - "y": 4 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 4xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.01 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 4 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads - 5xx", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "noValue": "", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2.5 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", - "format": null, - "instant": false, - "interval": "", - "intervalFactor": null, - "legendFormat": "", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Reads 99th latency", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "noValue": "All healthy", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "fill": 1, - "gridPos": { - "h": 3, - "w": 10, - "x": 0, - "y": 13 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "text": { - "titleSize": 14, - "valueSize": 14 - }, - "textMode": "value_and_name" - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{deployment}}", - "legendLink": null, - "step": null - }, - { - "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", - "format": null, - "instant": true, - "interval": "", - "intervalFactor": null, - "legendFormat": "{{statefulset}}", - "legendLink": null, - "step": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Unhealthy pods", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "stat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "r.*" - }, - "properties": [ - { - "id": "custom.align", - "value": "center" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 10, - "y": 8 - }, - "id": 11, - "targets": [ - { - "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods count per version", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "valueLabel": "version" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "container": 1 - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { }, - "sort": [ - { - "field": "container" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10 - }, - "unit": "percentunit" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "writes", - "legendLink": null - }, - { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", - "format": "time_series", - "legendFormat": "reads", - "legendLink": null - } - ], - "title": "Latency vs 24h ago", - "type": "timeseries" - } - ], - "refresh": "10s", - "rows": null, - "schemaVersion": 27, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Rollout progress", - "uid": "7f0b5567d543a1698e695b530eb7f5de", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-rollout-progress.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-ruler.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Active configurations", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Total rules", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", - "fill": 1, - "format": "reqps", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Read from ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "reqps", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Write to ingesters - QPS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "success", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "missed", - "legendLink": null - } - ], - "title": "Evaluations per second", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "average", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluations global", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Writes (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Reads (ingesters)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - key-value store for rulers ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Number of store-gateways hit per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Refetches of missing blocks per query", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failures / sec" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Failures / sec", - "legendLink": null - } - ], - "title": "Consistency checks failed", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler - blocks storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Delivery errors", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Queue length", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Dropped", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Missed iterations", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failures", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Group evaluations", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rule evaluation per user", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Operations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "max": 1, - "min": 0, - "noValue": "0", - "unit": "percentunit" - } - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", - "format": "time_series", - "legendFormat": "{{operation}}", - "legendLink": null - } - ], - "title": "Error rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Attributes", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Exists", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ruler configuration object store (ruler accesses)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Get", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: GetRange", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Upload", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency of op: Delete", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-ruler.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-scaling.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "id": 1, - "options": { - "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", - "mode": "markdown" - }, - "span": 12, - "title": "", - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Service scaling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "400px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Required Replicas", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "__name__", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Cluster", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "cluster", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Service", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "deployment", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "namespace", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Reason", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "reason", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Workload-based scaling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Scaling", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Scaling", - "uid": "64bbad83507b7289b514725658e10352", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-scaling.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - mimir-slow-queries.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p99", - "legendLink": null - }, - { - "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", - "format": "time_series", - "legendFormat": "p50", - "legendLink": null - } - ], - "title": "Query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Accross tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", - "format": "time_series", - "legendFormat": "{{user}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 tenants", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response time", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched series", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 fetched chunks", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 response size", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 time span", - "type": "timeseries" - }, - { - "datasource": "${loki_datasource}", - "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 2, - "targets": [ - { - "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", - "format": "time_series", - "legendFormat": "{{user_agent}}", - "legendLink": null - } - ], - "title": "P99 query wall time", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top 10 User-Agents", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "${loki_datasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "fetched_chunk_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_index_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "response_size_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_hit_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "results_cache_miss_bytes" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "estimated_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_chunks_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "fetched_series_count" - }, - "properties": [ - { - "id": "unit", - "value": "short" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Time span" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Duration" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "queue_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "query_wall_time_seconds" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "height": "500px", - "id": 19, - "span": 12, - "targets": [ - { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Slow queries", - "transformations": [ - { - "id": "extractFields", - "options": { - "source": "labels" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "component": true, - "container": true, - "gossip_ring_member": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "length": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_step": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "response_time": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "err": 10, - "length_seconds": 3, - "param_end": 5, - "param_query": 8, - "param_start": 4, - "param_step_seconds": 7, - "param_time": 6, - "response_time_seconds": 9, - "status": 1, - "ts": 0, - "user": 2 - }, - "renameByName": { - "err": "Error", - "length_seconds": "Time span", - "param_end": "End", - "param_query": "Query", - "param_start": "Start", - "param_step_seconds": "Step", - "param_time": "Time (instant query)", - "response_time_seconds": "Duration", - "ts": "Completion date", - "user": "Tenant ID" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "sharded_queries" - }, - { - "destinationType": "number", - "targetField": "split_queries" - }, - { - "destinationType": "number", - "targetField": "fetched_chunk_bytes" - }, - { - "destinationType": "number", - "targetField": "fetched_index_bytes" - }, - { - "destinationType": "number", - "targetField": "response_size_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_hit_bytes" - }, - { - "destinationType": "number", - "targetField": "results_cache_miss_bytes" - }, - { - "destinationType": "number", - "targetField": "estimated_series_count" - }, - { - "destinationType": "number", - "targetField": "fetched_chunks_count" - }, - { - "destinationType": "number", - "targetField": "fetched_series_count" - }, - { - "destinationType": "number", - "targetField": "Time span" - }, - { - "destinationType": "number", - "targetField": "Duration" - }, - { - "destinationType": "number", - "targetField": "Step" - }, - { - "destinationType": "number", - "targetField": "queue_time_seconds" - }, - { - "destinationType": "number", - "targetField": "query_wall_time_seconds" - } - ] - } - } - ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "hide": 0, - "includeAll": false, - "label": "Loki data source", - "multi": false, - "name": "loki_datasource", - "query": "loki", - "type": "datasource" - }, - { - "current": { - "selected": true, - "text": "5s", - "value": "5s" - }, - "hide": 0, - "label": "Min duration", - "name": "min_duration", - "options": [ - { - "selected": true, - "text": "5s", - "value": "5s" - } - ], - "query": "5s", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "Tenant ID", - "name": "tenant_id", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "hide": 0, - "label": "User-Agent HTTP Header", - "name": "user_agent", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "type": "textbox" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Slow queries", - "uid": "6089e1ce1e678788f46312a0a1e647e6", - "version": 0 - } -kind: ConfigMap + memcached-address: bWVtY2FjaGVkLm1lbWNhY2hlZC1zeXN0ZW0uc3ZjLmNsdXN0ZXIubG9jYWw6MTEyMTE= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-slow-queries.json + name: integrations-memcached namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "in-memory", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "owned", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "All series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series per ingester", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/local limit .+/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "local limit ({{job}})", - "legendLink": null - }, - { - "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Owned series per ingester", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Tenant series counts", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "series", - "legendLink": null - } - ], - "title": "Series with exemplars", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Oldest exemplar age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "active", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "active ({{ name }})", - "legendLink": null - } - ], - "title": "Native histogram series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "buckets", - "legendLink": null - }, - { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", - "format": "time_series", - "legendFormat": "buckets ({{ name }})", - "legendLink": null - } - ], - "title": "Total number of buckets used by native histogram series", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars and native histograms", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor requests incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor requests received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", - "format": "time_series", - "legendFormat": "age", - "legendLink": null - } - ], - "title": "Newest seen sample age", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded requests rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor ingestion requests", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor samples incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Distributor samples received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "deduplicated", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "non-HA", - "legendLink": null - } - ], - "title": "Distributor deduplicated/non-HA", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (distributor)", - "legendLink": null - }, - { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }} (ingester)", - "legendLink": null - } - ], - "title": "Distributor and ingester discarded samples rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Distributor exemplars received (accepted) rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{ reason }}", - "legendLink": null - } - ], - "title": "Distributor discarded exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars ingestion funnel", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Symbol table size for loaded blocks", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Space used by local blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingesters' storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "time_series", - "legendFormat": "groups", - "legendLink": null - }, - { - "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Number of groups", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "rules", - "legendLink": null - } - ], - "title": "Number of rules", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Total evaluations rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", - "format": "time_series", - "legendFormat": "{{ rule_group }}", - "legendLink": null - } - ], - "title": "Failed evaluations rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 27, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Sent notifications rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "rate" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "rate", - "legendLink": null - } - ], - "title": "Failed notifications rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "alerts", - "legendLink": null - }, - { - "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "silences", - "legendLink": null - } - ], - "title": "Alerts", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "NPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 33, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", - "format": "time_series", - "legendFormat": "success - {{ integration }}", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", - "format": "time_series", - "legendFormat": "failed - {{ integration }}", - "legendLink": null - } - ], - "title": "NPS by integration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Alertmanager", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 34, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 35, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (User)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 36, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Queries / Sec", - "legendLink": null - } - ], - "title": "Rate of Read Requests - ruler-query-frontend", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 37, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", - "format": "time_series", - "legendFormat": "Queue Length", - "legendLink": null - } - ], - "title": "Number of Queries Queued - ruler-query-scheduler", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read Path - Queries (Ruler)", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 50, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 38, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Estimated Compaction Jobs", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 39, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", - "format": "time_series", - "legendFormat": "{{ job }}", - "legendLink": null - } - ], - "title": "Blocks", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compactions", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "user", - "multi": false, - "name": "user", - "options": [ ], - "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "500", - "value": "500" - }, - { - "selected": false, - "text": "1000", - "value": "1000" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Tenants", - "uid": "35fa247ce651ba189debf33d7ae41611", - "version": 0 - } -kind: ConfigMap + mysql-host: bXlzcWwubXlzcWwtc3lzdGVtLnN2Yy5jbHVzdGVyLmxvY2Fs + mysql-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= + mysql-username: bGd0bXA= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-tenants.json + name: integrations-mysql namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-top-tenants.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "25px", - "panels": [ - { - "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Top tenants dashboard description", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by active series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By active series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by in-memory series (series created - series removed)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By in-memory series growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "samples/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by discarded samples rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 12, - "targets": [ - { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", - "format": "time_series", - "legendFormat": "{{ user }}", - "legendLink": null - } - ], - "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By discarded samples rate growth", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "series", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by series with exemplars", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By series with exemplars", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "exemplars/s", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by received exemplars rate in last 5m", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By exemplars rate", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit biggest groups", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group size", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 3, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By rule group evaluation time", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Compaction Jobs", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Top $limit users by estimated compaction jobs from bucket-index", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "By estimated compaction jobs from bucket-index", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "limit", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - } - ], - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Top tenants", - "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", - "version": 0 - } -kind: ConfigMap + redis-addr: cmVkaXMtbWFzdGVyLnJlZGlzLXN5c3RlbS5zdmMuY2x1c3Rlci5sb2NhbDo2Mzc5 + redis-password: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-top-tenants.json + name: integrations-redis namespace: monitoring-system +type: Opaque --- apiVersion: v1 data: - mimir-writes-networking.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Receive bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Transmit bandwidth", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - } - ], - "title": "Inflight requests (per pod)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - }, - { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", - "format": "time_series", - "legendFormat": "highest", - "legendLink": null - }, - { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "TCP connections (per pod)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", - "version": 0 - } -kind: ConfigMap + MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= +kind: Secret metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir - labels: - grafana_dashboard: "1" - name: mimir-writes-networking.json + name: mimir-env-92ddctt858 namespace: monitoring-system +type: Opaque --- apiVersion: v1 -data: - mimir-writes-resources.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Summary", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory series", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 8, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "CPU", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (RSS)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "fill": "dash" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "custom": { - "fillOpacity": 0 - }, - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk space utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", - "version": 0 - } -kind: ConfigMap +kind: Service metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes-resources.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 -data: - mimir-writes.json: |- - { - "__requires": [ - { - "id": "grafana", - "name": "Grafana", - "type": "grafana", - "version": "8.0.0" - } - ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 1, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "mimir" - ], - "targetBlank": false, - "title": "Mimir dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "125px", - "panels": [ - { - "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", - "datasource": null, - "description": "", - "id": 1, - "mode": "markdown", - "span": 12, - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Writes dashboard description", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Samples / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", - "fill": 1, - "format": "short", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars / sec", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", - "fill": 1, - "format": "short", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "In-memory series", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", - "fill": 1, - "format": "short", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Exemplars in ingesters", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", - "format": "time_series", - "instant": true, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Tenants", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 8, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 9, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 10, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 11, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 0, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 12, - "links": [ ], - "options": { - "legend": { - "displayMode": "hidden", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "span": 4, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", - "format": "time_series", - "legendFormat": "", - "legendLink": null - } - ], - "title": "Per pod p99 latency", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 13, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 14, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for high-availability (HA) deduplication", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 15, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 16, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Distributor - key-value store for distributors ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 17, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", - "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" - } - ], - "title": "Requests / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 18, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - key-value store for the ingesters ring", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 19, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Uploaded blocks / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 20, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Upload latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - shipper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 21, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Compactions / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ms" - }, - "overrides": [ ] - }, - "id": 22, - "links": [ ], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Compactions latency", - "type": "timeseries", - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB head", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 23, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "WAL truncations / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "successful" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 24, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "successful", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "failed", - "legendLink": null - } - ], - "title": "Checkpoints created / sec", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "id": 25, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", - "format": "time_series", - "legendFormat": "avg", - "legendLink": null - } - ], - "title": "WAL truncations latency (includes checkpointing)", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "WAL" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "mmap-ed chunks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E28A42", - "mode": "fixed" - } - } - ] - } - ] - }, - "id": 26, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "WAL", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "mmap-ed chunks", - "legendLink": null - } - ], - "title": "Corruptions / sec", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester - TSDB write ahead log (WAL)", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 27, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "incoming exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars incoming rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 28, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", - "format": "time_series", - "legendFormat": "received exemplars", - "legendLink": null - } - ], - "title": "Distributor exemplars received rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 29, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "ingested exemplars", - "legendLink": null - } - ], - "title": "Ingester ingested exemplars rate", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "ex/s" - }, - "overrides": [ ] - }, - "id": 30, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 3, - "targets": [ - { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", - "format": "time_series", - "legendFormat": "appended exemplars", - "legendLink": null - } - ], - "title": "Ingester appended exemplars rate", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Exemplars", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 31, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected distributor requests", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "id": 32, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "targets": [ - { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{reason}}", - "legendLink": null - } - ], - "title": "Rejected ingester requests", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Instance Limits", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", - "version": 0 - } -kind: ConfigMap +kind: Service metadata: - annotations: - grafana_dashboard_folder: /dashboards/Mimir labels: - grafana_dashboard: "1" - name: mimir-writes.json + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent-cluster namespace: monitoring-system ---- -apiVersion: v1 -data: - MIMIR_S3_SECRET_ACCESS_KEY: VkQ1MzhPWXhTRWlHRDRJOW1tRmZxRk1DR3ExdklpR20= -kind: Secret -metadata: - name: mimir-env-92ddctt858 - namespace: monitoring-system -type: Opaque +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: grpc-otlp + port: 4317 + protocol: TCP + targetPort: 4317 + - name: http-otlp + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + selector: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + type: ClusterIP --- apiVersion: v1 kind: Service @@ -43828,1595 +1215,148 @@ spec: - emptyDir: {} name: storage --- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: agent-flow-mixin-alerts - namespace: monitoring-system -spec: - groups: - - name: clustering - rules: - - alert: ClusterNotConverging - annotations: - message: Cluster is not converging. - expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) - != 0 - for: 5m - - alert: ClusterSplitBrain - annotations: - message: Cluster nodes have entered a split brain state. - expr: | - sum without (state) (cluster_node_peers) != - on (cluster, namespace) group_left - count by (cluster, namespace) (cluster_node_info) - for: 5m - - alert: ClusterLamportClockDrift - annotations: - message: Cluster nodes' lamport clocks are not converging. - expr: stddev by (cluster, namespace) (cluster_node_lamport_time) > 4 * sqrt(count - by (cluster, namespace) (cluster_node_info)) - for: 5m - - alert: ClusterNodeUnhealthy - annotations: - message: Cluster node is reporting a health score > 0. - expr: | - cluster_node_gossip_health_score > 0 - for: 5m - - alert: ClusterLamportClockStuck - annotations: - message: Cluster nodes's lamport clocks is not progressing. - expr: | - sum by (cluster, namespace, instance) (rate(cluster_node_lamport_time[2m])) == 0 - and on (cluster, namespace, instance) (cluster_node_peers > 1) - for: 5m - - alert: ClusterNodeNameConflict - annotations: - message: A node tried to join the cluster with a name conflicting with an - existing peer. - expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) - > 0 - for: 10m - - alert: ClusterNodeStuckTerminating - annotations: - message: Cluster node stuck in Terminating state. - expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) - > 0 - for: 5m - - alert: ClusterConfigurationDrift - annotations: - message: Cluster nodes are not using the same configuration file. - expr: | - count without (sha256) ( - max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) - ) > 1 - for: 5m - - name: agent_controller - rules: - - alert: SlowComponentEvaluations - annotations: - message: Flow component evaluations are taking too long. - expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) - > 0 - for: 15m - - alert: UnhealthyComponents - annotations: - message: Unhealthy Flow components detected. - expr: sum(agent_component_controller_running_components{health_type!="healthy"}) - > 0 - for: 15m ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +apiVersion: apps/v1 +kind: DaemonSet metadata: - name: mimir-mixin-alerts + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_alerts - rules: - - alert: MimirIngesterUnhealthy - annotations: - message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ - printf "%f" $value }} unhealthy ingester(s). - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy - expr: | - min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 - for: 15m - labels: - severity: critical - - alert: MimirRequestErrors - annotations: - message: | - The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors - expr: | - 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) - / - sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) - > 1 - for: 15m - labels: - severity: critical - - alert: MimirRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency - expr: | - cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} - > - 2.5 - for: 15m - labels: - severity: warning - - alert: MimirQueriesIncorrect - annotations: - message: | - The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect - expr: | - 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) - / - sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 - for: 15m - labels: - severity: warning - - alert: MimirInconsistentRuntimeConfig - annotations: - message: | - An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig - expr: | - count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 - for: 1h - labels: - severity: critical - - alert: MimirBadRuntimeConfig - annotations: - message: | - {{ $labels.job }} failed to reload runtime config. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig - expr: | - # The metric value is reset to 0 on error while reloading the config at runtime. - cortex_runtime_config_last_reload_successful == 0 - for: 5m - labels: - severity: critical - - alert: MimirFrontendQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 - for: 5m - labels: - severity: critical - - alert: MimirSchedulerQueriesStuck - annotations: - message: | - There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck - expr: | - sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 - for: 7m - labels: - severity: critical - - alert: MimirCacheRequestErrors - annotations: - message: | - The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors - expr: | - ( - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operation_failures_total[1m]) - or - rate(thanos_cache_operation_failures_total[1m]) - ) - / - sum by(cluster, namespace, name, operation) ( - rate(thanos_memcached_operations_total[1m]) - or - rate(thanos_cache_operations_total[1m]) - ) - ) * 100 > 5 - for: 5m - labels: - severity: warning - - alert: MimirIngesterRestarts - annotations: - message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts - expr: | - ( - sum by(cluster, namespace, pod) ( - increase(kube_pod_container_status_restarts_total{container=~"(ingester|mimir-write)"}[30m]) - ) - >= 2 - ) - and - ( - count by(cluster, namespace, pod) (cortex_build_info) > 0 - ) - labels: - severity: warning - - alert: MimirKVStoreFailure - annotations: - message: | - Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure - expr: | - ( - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) - / - sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) - ) - # We want to get alerted only in case there's a constant failure. - == 1 - for: 5m - labels: - severity: critical - - alert: MimirMemoryMapAreasTooHigh - annotations: - message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas - close to the limit.' - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh - expr: | - process_memory_map_areas{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(ingester.*|cortex|mimir|mimir-write.*|store-gateway.*|cortex|mimir|mimir-backend.*)"} > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirIngesterInstanceHasNoTenants - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no tenants assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants - expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) - and on (cluster, namespace) - # Only if there are more time-series than would be expected due to continuous testing load - ( - sum by(cluster, namespace) (cortex_ingester_memory_series) - / - max by(cluster, namespace) (cortex_distributor_replication_factor) - ) > 100000 - for: 1h - labels: - severity: warning - - alert: MimirRulerInstanceHasNoRuleGroups - annotations: - message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has no rule groups assigned. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups - expr: | - # Alert on ruler instances in microservices mode that have no rule groups assigned, - min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?ruler.*"}) == 0 - # but only if other ruler instances of the same cell do have rule groups assigned - and on (cluster, namespace) - (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) - # and there are more than two instances overall - and on (cluster, namespace) - (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) - for: 1h - labels: - severity: warning - - alert: MimirIngestedDataTooFarInTheFuture - annotations: - message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has ingested samples with timestamps more than 1h in the future. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture - expr: | - max by(cluster, namespace, pod) ( - cortex_ingester_tsdb_head_max_timestamp_seconds - time() - and - cortex_ingester_tsdb_head_max_timestamp_seconds > 0 - ) > 60*60 - for: 5m - labels: - severity: warning - - alert: MimirRingMembersMismatch - annotations: - message: | - Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch - expr: | - ( - avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"})) - != sum by(cluster, namespace) (up{job=~".*/(ingester.*|cortex|mimir|mimir-write.*)"}) - ) - and - ( - count by(cluster, namespace) (cortex_build_info) > 0 - ) - for: 15m - labels: - component: ingester - severity: warning - - name: mimir_instance_limits_alerts - rules: - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.8 - for: 3h - labels: - severity: warning - - alert: MimirIngesterReachingSeriesLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit - expr: | - ( - (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_series"} > 0) - ) > 0.9 - for: 5m - labels: - severity: critical - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.7 - for: 5m - labels: - severity: warning - - alert: MimirIngesterReachingTenantsLimit - annotations: - message: | - Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit - expr: | - ( - (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) - and ignoring (limit) - (cortex_ingester_instance_limits{limit="max_tenants"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - alert: MimirReachingTCPConnectionsLimit - annotations: - message: | - Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit - expr: | - cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and - cortex_tcp_connections_limit > 0 - for: 5m - labels: - severity: critical - - alert: MimirDistributorReachingInflightPushRequestLimit - annotations: - message: | - Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit - expr: | - ( - (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) - and ignoring (limit) - (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) - ) > 0.8 - for: 5m - labels: - severity: critical - - name: mimir-rollout-alerts - rules: - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - max without (revision) ( - sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - unless - sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - * - ( - sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - != - sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) - ) - ) and ( - changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: statefulset - - alert: MimirRolloutStuck - annotations: - message: | - The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck - expr: | - ( - sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - != - sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) - ) and ( - changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) - == - 0 - ) - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - for: 30m - labels: - severity: warning - workload_type: deployment - - alert: RolloutOperatorNotReconciling - annotations: - message: | - Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling - expr: | - max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 - for: 5m - labels: - severity: critical - - name: mimir-provisioning - rules: - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.65 - for: 15m - labels: - severity: warning - - alert: MimirAllocatingTooMuchMemory - annotations: - message: | - Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory - expr: | - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(ingester|mimir-write|mimir-backend)"} - / - ( container_spec_memory_limit_bytes{container=~"(ingester|mimir-write|mimir-backend)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - > 0.8 - for: 15m - labels: - severity: critical - - name: ruler_alerts - rules: - - alert: MimirRulerTooManyFailedPushes - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerTooManyFailedQueries - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries - expr: | - 100 * ( - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) - / - sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) - ) > 1 - for: 5m - labels: - severity: critical - - alert: MimirRulerMissedEvaluations - annotations: - message: | - Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations - expr: | - 100 * ( - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) - / - sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) - ) > 1 - for: 5m - labels: - severity: warning - - alert: MimirRulerFailedRingCheck - annotations: - message: | - Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck - expr: | - sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) - > 0 - for: 5m - labels: - severity: critical - - alert: MimirRulerRemoteEvaluationFailing - annotations: - message: | - Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing - expr: | - 100 * ( - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(ruler-query-frontend.*)"}[5m])) - / - sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(ruler-query-frontend.*)"}[5m])) - ) > 1 - for: 5m - labels: - severity: warning - - name: gossip_alerts - rules: - - alert: MimirGossipMembersTooHigh - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a higher than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh - expr: | - max by (cluster, namespace) (memberlist_client_cluster_members_count) - > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) - for: 20m - labels: - severity: warning - - alert: MimirGossipMembersTooLow - annotations: - message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace - }} consistently sees a lower than expected number of gossip members. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow - expr: | - min by (cluster, namespace) (memberlist_client_cluster_members_count) - < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) - for: 20m - labels: - severity: warning - - name: etcd_alerts - rules: - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.65 - for: 15m - labels: - severity: warning - - alert: EtcdAllocatingTooMuchMemory - annotations: - message: | - Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory - expr: | - ( - container_memory_working_set_bytes{container="etcd"} - / - ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) - ) > 0.8 - for: 15m - labels: - severity: critical - - name: alertmanager_alerts - rules: - - alert: MimirAlertmanagerSyncConfigsFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing - expr: | - rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 - for: 30m - labels: - severity: critical - - alert: MimirAlertmanagerRingCheckFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing - expr: | - rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPartialStateMergeFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing - expr: | - rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerReplicationFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing - expr: | - rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 - for: 10m - labels: - severity: critical - - alert: MimirAlertmanagerPersistStateFailing - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing - expr: | - rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 - for: 1h - labels: - severity: critical - - alert: MimirAlertmanagerInitialSyncFailed - annotations: - message: | - Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed - expr: | - increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 - labels: - severity: critical - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: warning - - alert: MimirAlertmanagerAllocatingTooMuchMemory - annotations: - message: | - Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory - expr: | - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - for: 15m - labels: - severity: critical - - alert: MimirAlertmanagerInstanceHasNoTenants - annotations: - message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} owns no tenants. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants - expr: | - # Alert on alertmanager instances in microservices mode that own no tenants, - min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?alertmanager.*"}) == 0 - # but only if other instances of the same cell do have tenants assigned. - and on (cluster, namespace) - max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 - for: 1h - labels: - severity: warning - - name: mimir_blocks_alerts - rules: - - alert: MimirIngesterHasNotShippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks - expr: | - (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) - and - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) - and - # Only if the ingester has ingested samples over the last 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - and - # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica - # had ingested samples in the past, then no traffic was received for a long period and then it starts - # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving - # samples, while the a block shipping is expected within the next 4h. - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterHasNotShippedBlocksSinceStart - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not shipped any block in the last 4 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart - expr: | - (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) - and - (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) - for: 4h - labels: - severity: critical - - alert: MimirIngesterHasUnshippedBlocks - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't - been successfully uploaded to the storage yet. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks - expr: | - (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) - and - (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadCompactionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to compact TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed - expr: | - rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: MimirIngesterTSDBHeadTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB head. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed - expr: | - rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointCreationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to create TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBCheckpointDeletionFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to delete TSDB checkpoint. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed - expr: | - rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 - labels: - severity: critical - - alert: MimirIngesterTSDBWALTruncationFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to truncate TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed - expr: | - rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 - labels: - severity: warning - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 - and - # and there is only one zone - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 - labels: - deployment: single-zone - severity: critical - - alert: MimirIngesterTSDBWALCorrupted - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} got a corrupted TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted - expr: | - # alert when there are more than one corruptions - count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 - and - # and there are multiple zones - count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 - labels: - deployment: multi-zone - severity: critical - - alert: MimirIngesterTSDBWALWritesFailed - annotations: - message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} is failing to write to TSDB WAL. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed - expr: | - rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 - for: 3m - labels: - severity: critical - - alert: MimirStoreGatewayHasNotSyncTheBucket - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} has not successfully synched the bucket since {{ $value - | humanizeDuration }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket - expr: | - (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) - and - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 - for: 5m - labels: - severity: critical - - alert: MimirStoreGatewayNoSyncedTenants - annotations: - message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not syncing any blocks for any tenant. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants - expr: | - min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 - for: 1h - labels: - severity: warning - - alert: MimirBucketIndexNotUpdated - annotations: - message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster - }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated - expr: | - min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 - labels: - severity: critical - - name: mimir_compactor_alerts - rules: - - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully cleaned up blocks in the last 6 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) - for: 1h - labels: - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) - and - (cortex_compactor_last_successful_run_timestamp_seconds > 0) - for: 1h - labels: - reason: in-last-24h - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not run compaction in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - # The "last successful run" metric is updated even if the compactor owns no tenants, - # so this alert correctly doesn't fire if compactor has nothing to do. - cortex_compactor_last_successful_run_timestamp_seconds == 0 - for: 24h - labels: - reason: since-startup - severity: critical - - alert: MimirCompactorHasNotSuccessfullyRunCompaction - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} failed to run 2 consecutive compactions. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction - expr: | - increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 - labels: - reason: consecutive-failures - severity: critical - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block in the last 24 hours. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) - and - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 15m - labels: - severity: critical - time_period: 24h - - alert: MimirCompactorHasNotUploadedBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not uploaded any block since its start. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks - expr: | - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) - and - # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do - # (e.g. there are more replicas than required because running as part of mimir-backend). - (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) - for: 24h - labels: - severity: critical - time_period: since-start - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 - for: 1m - labels: - severity: warning - - alert: MimirCompactorSkippedUnhealthyBlocks - annotations: - message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has found and ignored unhealthy blocks. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks - expr: | - increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 - for: 30m - labels: - severity: critical - - name: mimir_autoscaling - rules: - - alert: MimirAutoscalerNotActive - annotations: - message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler - }} in {{ $labels.namespace }} is not active. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive - expr: | - ( - label_replace(( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - # Add "metric" label. - + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0), - "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" - ) - ) - # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, - # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported - # by KEDA could not exist at all or being exposed with a value of 0. - and on (cluster, namespace, metric, scaledObject) - (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) - for: 1h - labels: - severity: critical - - alert: MimirAutoscalerKedaFailing - annotations: - message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace - }} is experiencing errors. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing - expr: | - ( - # Find KEDA scalers reporting errors. - label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") - # Match only Mimir namespaces. - * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) - ) - > 0 - for: 1h - labels: - severity: critical - - name: mimir_continuous_test - rules: - - alert: MimirContinuousTestNotRunningOnWrites - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because writes are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestNotRunningOnReads - annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} is not effectively running because queries are failing. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 - for: 1h - labels: - severity: warning - - alert: MimirContinuousTestFailed + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + template: + metadata: annotations: - message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ - $labels.namespace }} failed when asserting query results. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed - expr: | - sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 - labels: - severity: warning + kubectl.kubernetes.io/default-container: grafana-agent + logs.agent.grafana.com/scrape: "true" + logs.agent.grafana.com/scrub-level: debug + profiles.grafana.com/cpu.port_name: http-metrics + profiles.grafana.com/cpu.scrape: "false" + profiles.grafana.com/goroutine.port_name: http-metrics + profiles.grafana.com/goroutine.scrape: "false" + profiles.grafana.com/memory.port_name: http-metrics + profiles.grafana.com/memory.scrape: "false" + pyroscope.io/service_name: grafana-agent + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent + spec: + containers: + - args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting + - --cluster.enabled=true + - --cluster.join-addresses=grafana-agent-cluster + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: helm + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + envFrom: + - secretRef: + name: agent-env + optional: true + image: docker.io/grafana/agent:v0.40.3 + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 4317 + name: grpc-otlp + protocol: TCP + - containerPort: 4318 + name: http-otlp + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 6831 + name: jaeger-compact + protocol: UDP + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /etc/agent + name: config + - mountPath: /var/log + name: varlog + readOnly: true + - mountPath: /etc/agent-modules + name: agent-modules + - args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + name: config-reloader + resources: + requests: + cpu: 1m + memory: 5Mi + volumeMounts: + - mountPath: /etc/agent + name: config + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: grafana-agent + volumes: + - configMap: + name: agent-config-9cc7gk9k2b + name: config + - hostPath: + path: /var/log + name: varlog + - configMap: + name: agent-modules-cf8t5bf7t9 + name: agent-modules + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 2 + type: RollingUpdate --- apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule +kind: ServiceMonitor metadata: - name: mimir-mixin-rules + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent namespace: monitoring-system spec: - groups: - - name: mimir_api_1 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / - sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) - record: cluster_job:cortex_request_duration_seconds:sum_rate - - name: mimir_api_2 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - job, route) - record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, - route) - record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) - record: cluster_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_api_3 - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:avg - - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate - - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, - job, route) - record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate - - name: mimir_querier_api - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job) - record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, job, route)) - record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - job, route) - record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) - by (le, cluster, namespace, job, route)) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) - by (cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg - - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, - cluster, namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, - namespace, job, route) - record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate - - name: mimir_storage - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_kv_request_duration_seconds:50quantile - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds:avg - - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) - record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate - - name: mimir_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_retries:50quantile - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) - by (cluster, job) - record: cluster_job:cortex_query_frontend_retries:avg - - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) - record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) - record: cluster_job:cortex_query_frontend_retries_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by - (cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by - (le, cluster, job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate - - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, - job) - record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate - - name: mimir_ingester_queries - rules: - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_series:50quantile - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series:avg - - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_series_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_samples:50quantile - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / - sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples:avg - - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_samples_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:99quantile - - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) - by (le, cluster, job)) - record: cluster_job:cortex_ingester_queried_exemplars:50quantile - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars:avg - - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, - job) - record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate - - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) - record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate - - name: mimir_received_samples - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_samples:rate5m - - name: mimir_exemplars_in - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) - record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m - - name: mimir_received_exemplars - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) - record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m - - name: mimir_exemplars_ingested - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) - record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m - - name: mimir_exemplars_appended - rules: - - expr: | - sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) - record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m - - name: mimir_scaling_rules - rules: - - expr: | - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (cluster, namespace, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - record: cluster_namespace_deployment:actual_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - / 240000 - ) - labels: - deployment: distributor - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 240000 - ) - labels: - deployment: distributor - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by (cluster, namespace) ( - cluster_namespace_job:cortex_distributor_received_samples:rate5m - )[24h:] - ) - * 3 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - quantile_over_time(0.99, - sum by(cluster, namespace) ( - cortex_ingester_memory_series - )[24h:] - ) - / 1500000 - ) - labels: - deployment: ingester - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) - * 3 * 0.59999999999999998 / 1500000 - ) - labels: - deployment: ingester - reason: active_series_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) - * 0.59999999999999998 / 80000 - ) - labels: - deployment: ingester - reason: sample_rate_limits - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - ceil( - (sum by (cluster, namespace) ( - cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} - ) / 4) - / - avg by (cluster, namespace) ( - memcached_limit_bytes{job=~".+/memcached"} - ) - ) - labels: - deployment: memcached - reason: active_series - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate - - expr: | - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - labels: - reason: cpu_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - expr: | - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - record: cluster_namespace_deployment:container_memory_usage_bytes:sum - - expr: | - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (cluster, namespace, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - cluster_namespace_deployment:actual_replicas:count - * - quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) - / - cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) - labels: - reason: memory_usage - record: cluster_namespace_deployment_reason:required_replicas:count - - name: mimir_alertmanager_rules - rules: - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_alerts) - record: cluster_job_pod:cortex_alertmanager_alerts:sum - - expr: | - sum by (cluster, job, pod) (cortex_alertmanager_silences) - record: cluster_job_pod:cortex_alertmanager_silences:sum - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) - record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m - - expr: | - sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) - record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) - record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m - - expr: | - sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) - record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m - - name: mimir_ingester_rules - rules: - - expr: | - sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) - record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m + endpoints: + - honorLabels: true + port: http-metrics + scheme: http + selector: + matchLabels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/name: grafana-agent --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -45516,3 +1456,37 @@ spec: app.kubernetes.io/component: mimir-write app.kubernetes.io/instance: mimir-read-write-mode app.kubernetes.io/name: mimir +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.40.3 + helm.sh/chart: grafana-agent-0.37.0 + name: grafana-agent + namespace: monitoring-system +spec: + rules: + - host: grafana-agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix + - host: agent.localhost + http: + paths: + - backend: + service: + name: grafana-agent + port: + number: 80 + path: / + pathType: Prefix diff --git a/kubernetes/read-write-mode/metrics/kustomization.yaml b/kubernetes/read-write-mode/metrics/kustomization.yaml index 4fe5ebc0..30c94eb0 100644 --- a/kubernetes/read-write-mode/metrics/kustomization.yaml +++ b/kubernetes/read-write-mode/metrics/kustomization.yaml @@ -11,10 +11,8 @@ kind: Kustomization # https://grafana.com/docs/mimir/latest/references/architecture/deployment-modes/#read-write-mode resources: +- ../../common/grafana-agent - mimir -- ../../../monitoring-mixins/agent-flow-mixin/deploy -- ../../../monitoring-mixins/go-runtime-mixin/deploy -- ../../../monitoring-mixins/mimir-mixin/deploy secretGenerator: - name: mimir-env @@ -25,19 +23,10 @@ secretGenerator: configMapGenerator: - name: agent-config namespace: monitoring-system - options: - disableNameSuffixHash: true + behavior: replace files: - configs/config.river -- name: grafana-datasources - namespace: monitoring-system - options: - labels: - grafana_datasource: "1" - files: - - datasources.yaml=configs/grafana-datasources-mimir.yaml - - name: mimir-config namespace: monitoring-system files: diff --git a/monitoring-mixins/Makefile b/monitoring-mixins/Makefile new file mode 100644 index 00000000..b2e20df8 --- /dev/null +++ b/monitoring-mixins/Makefile @@ -0,0 +1,7 @@ +include ../.bingo/Variables.mk + +all: build + +.PHONY: build +build: $(KUSTOMIZE) + @$(KUSTOMIZE) build . > /dev/null diff --git a/monitoring-mixins/README.md b/monitoring-mixins/README.md index 16f45e40..3fa4b09e 100644 --- a/monitoring-mixins/README.md +++ b/monitoring-mixins/README.md @@ -1,5 +1,7 @@ # Monitoring Mixins +> Provisioning Grafana `dashboards` Prometheus `rules` and `alerts` + Mixins are written in [jsonnet](https://jsonnet.org/), and are typically installed and updated with [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler). For more advanced uses of mixins, see diff --git a/monitoring-mixins/kustomization.yaml b/monitoring-mixins/kustomization.yaml new file mode 100644 index 00000000..89e886ca --- /dev/null +++ b/monitoring-mixins/kustomization.yaml @@ -0,0 +1,20 @@ +# ============================================================================ # +# Monitoring Mixins +# ============================================================================ # +# Provisioning Grafana dashboards Prometheus rules and alerts + +# ---------------------------------------------------- +# apiVersion and kind of Kustomization +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- agent-flow-mixin/deploy +- go-runtime-mixin/deploy +# - kubernetes-mixin/deploy +- loki-mixin/deploy +- memcached-mixin/deploy +- mimir-mixin/deploy +- pyroscope-mixin/deploy +# - tempo-mixin/deploy